fix: correct field data offset calculation in rerank functions for bulk search (#45444)

Related to #45338

When using bulk vector search in hybrid search with rerank functions,
the output field values for different queries were all equal to the
values returned by the first query, instead of the correct values
belonging to each document ID. The document IDs were correct, but the
entity field values were wrong.

In rerank functions (RRF, weighted, decay, model), when processing
multiple queries in a batch, the `idLocations` stored only the relative
offset within each result set (`idx`), not accounting for the absolute
position within the entire batch. This caused `FillFieldData` to
retrieve field data from the wrong positions, always using offsets
relative to the first query.

This fix ensures that when processing bulk searches with rerank
functions, each result correctly retrieves its corresponding field data
based on the absolute offset within the entire batch, resolving the
issue where all queries returned the first query's field values.

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-11-11 14:39:41 +08:00 committed by GitHub
parent dcf490663c
commit 382b1d7de6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 7 additions and 4 deletions

View File

@ -187,7 +187,7 @@ func (decay *DecayFunction[T, R]) processOneSearchData(ctx context.Context, sear
ids := col.ids.([]T)
for idx, id := range ids {
if _, ok := decayScores[id]; !ok {
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
decayScores[id] = float32(decay.reScorer(decay.origin, decay.scale, decay.decay, decay.offset, float64(nums[idx])))
}
}

View File

@ -151,7 +151,7 @@ func (model *ModelFunction[T]) processOneSearchData(ctx context.Context, searchP
ids := col.ids.([]T)
for idx, id := range ids {
if _, ok := uniqueData[id]; !ok {
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
uniqueData[id] = texts[idx]
}
}

View File

@ -77,7 +77,7 @@ func (rrf *RRFFunction[T]) processOneSearchData(ctx context.Context, searchParam
ids := col.ids.([]T)
for idx, id := range ids {
if score, ok := rrfScores[id]; !ok {
idLocations[id] = IDLoc{batchIdx: i, offset: idx}
idLocations[id] = IDLoc{batchIdx: i, offset: idx + int(col.nqOffset)}
rrfScores[id] = 1 / (rrf.k + float32(idx+1))
} else {
rrfScores[id] = score + 1/(rrf.k+float32(idx+1))

View File

@ -41,6 +41,8 @@ type columns struct {
size int64
ids any
scores []float32
nqOffset int64
}
type rerankInputs struct {
@ -101,6 +103,7 @@ func newRerankInputs(multipSearchResultData []*schemapb.SearchResultData, inputF
cols[i][retIdx].size = size
cols[i][retIdx].ids = getIds(searchResult.Ids, start, size)
cols[i][retIdx].scores = searchResult.Scores[start : start+size]
cols[i][retIdx].nqOffset = start
}
for _, fieldId := range inputFieldIds {
fieldData, exist := multipIdField[retIdx][fieldId]

View File

@ -97,7 +97,7 @@ func (weighted *WeightedFunction[T]) processOneSearchData(ctx context.Context, s
ids := col.ids.([]T)
for j, id := range ids {
if score, ok := weightedScores[id]; !ok {
idLocations[id] = IDLoc{batchIdx: i, offset: j}
idLocations[id] = IDLoc{batchIdx: i, offset: j + int(col.nqOffset)}
weightedScores[id] = weighted.weight[i] * normFunc(col.scores[j])
} else {
weightedScores[id] = score + weighted.weight[i]*normFunc(col.scores[j])