mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: correct field data offset calculation in rerank functions for bulk search (#45444)
Related to #45338 When using bulk vector search in hybrid search with rerank functions, the output field values for different queries were all equal to the values returned by the first query, instead of the correct values belonging to each document ID. The document IDs were correct, but the entity field values were wrong. In rerank functions (RRF, weighted, decay, model), when processing multiple queries in a batch, the `idLocations` stored only the relative offset within each result set (`idx`), not accounting for the absolute position within the entire batch. This caused `FillFieldData` to retrieve field data from the wrong positions, always using offsets relative to the first query. This fix ensures that when processing bulk searches with rerank functions, each result correctly retrieves its corresponding field data based on the absolute offset within the entire batch, resolving the issue where all queries returned the first query's field values. Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
dcf490663c
commit
382b1d7de6
@ -187,7 +187,7 @@ func (decay *DecayFunction[T, R]) processOneSearchData(ctx context.Context, sear
|
||||
ids := col.ids.([]T)
|
||||
for idx, id := range ids {
|
||||
if _, ok := decayScores[id]; !ok {
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
|
||||
decayScores[id] = float32(decay.reScorer(decay.origin, decay.scale, decay.decay, decay.offset, float64(nums[idx])))
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,7 +151,7 @@ func (model *ModelFunction[T]) processOneSearchData(ctx context.Context, searchP
|
||||
ids := col.ids.([]T)
|
||||
for idx, id := range ids {
|
||||
if _, ok := uniqueData[id]; !ok {
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
|
||||
uniqueData[id] = texts[idx]
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ func (rrf *RRFFunction[T]) processOneSearchData(ctx context.Context, searchParam
|
||||
ids := col.ids.([]T)
|
||||
for idx, id := range ids {
|
||||
if score, ok := rrfScores[id]; !ok {
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
|
||||
rrfScores[id] = 1 / (rrf.k + float32(idx+1))
|
||||
} else {
|
||||
rrfScores[id] = score + 1/(rrf.k+float32(idx+1))
|
||||
|
||||
@ -41,6 +41,8 @@ type columns struct {
|
||||
size int64
|
||||
ids any
|
||||
scores []float32
|
||||
|
||||
nqOffset int64
|
||||
}
|
||||
|
||||
type rerankInputs struct {
|
||||
@ -101,6 +103,7 @@ func newRerankInputs(multipSearchResultData []*schemapb.SearchResultData, inputF
|
||||
cols[i][retIdx].size = size
|
||||
cols[i][retIdx].ids = getIds(searchResult.Ids, start, size)
|
||||
cols[i][retIdx].scores = searchResult.Scores[start : start+size]
|
||||
cols[i][retIdx].nqOffset = start
|
||||
}
|
||||
for _, fieldId := range inputFieldIds {
|
||||
fieldData, exist := multipIdField[retIdx][fieldId]
|
||||
|
||||
@ -97,7 +97,7 @@ func (weighted *WeightedFunction[T]) processOneSearchData(ctx context.Context, s
|
||||
ids := col.ids.([]T)
|
||||
for j, id := range ids {
|
||||
if score, ok := weightedScores[id]; !ok {
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: j}
|
||||
idLocations[id] = IDLoc{batchIdx: i, offset: j + int(col.nqOffset)}
|
||||
weightedScores[id] = weighted.weight[i] * normFunc(col.scores[j])
|
||||
} else {
|
||||
weightedScores[id] = score + weighted.weight[i]*normFunc(col.scores[j])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user