fix: [hotfix]Handle empty FieldsData in reduce/rerank for requery scenario (#45137) (#45389)

issue: #44909
pr: #44917

When requery optimization is enabled, search results contain IDs but
empty FieldsData. During reduce/rerank operations, if the first shard
has empty FieldsData while others have data, PrepareResultFieldData
initializes an empty array, causing AppendFieldData to panic when
accessing array indices.

Changes:
- Find first non-empty FieldsData as template in 3 functions:
reduceAdvanceGroupBy, reduceSearchResultDataWithGroupBy,
reduceSearchResultDataNoGroupBy
- Add length check before 2 AppendFieldData calls in reduce functions to
prevent panic
- Improve newRerankOutputs to find first non-empty fieldData using
len(FieldsData) check instead of GetSizeOfIDs
- Add length check in appendResult before AppendFieldData
- Add comprehensive unit tests for empty and partial empty FieldsData
scenarios in both reduce and rerank functions

This fix handles both pure requery (all empty) and mixed scenarios (some
empty, some with data) without breaking normal search flow. The key
improvement is checking FieldsData length directly rather than IDs, as
requery may have IDs but empty FieldsData.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
Co-authored-by: wei liu <wei.liu@zilliz.com>
This commit is contained in:
congqixia 2025-11-07 14:41:34 +08:00 committed by GitHub
parent 1654b4cdc3
commit 81c2fd46a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 334 additions and 8 deletions

View File

@ -101,7 +101,13 @@ func reduceAdvanceGroupBy(ctx context.Context, subSearchResultData []*schemapb.S
} else {
ret.GetResults().AllSearchCount = allSearchCount
limit = int64(hitNum)
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit)
// Find the first non-empty FieldsData as template
for _, result := range subSearchResultData {
if len(result.GetFieldsData()) > 0 {
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
break
}
}
}
if err := setupIdListForSearchResult(ret, pkType, limit); err != nil {
@ -193,7 +199,7 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
Results: &schemapb.SearchResultData{
NumQueries: nq,
TopK: topk,
FieldsData: typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit),
FieldsData: []*schemapb.FieldData{},
Scores: []float32{},
Ids: &schemapb.IDs{},
Topks: []int64{},
@ -211,6 +217,14 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
ret.GetResults().AllSearchCount = allSearchCount
}
// Find the first non-empty FieldsData as template
for _, result := range subSearchResultData {
if len(result.GetFieldsData()) > 0 {
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
break
}
}
var (
subSearchNum = len(subSearchResultData)
// for results of each subSearchResultData, storing the start offset of each query of nq queries
@ -289,7 +303,9 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
groupEntities := groupByValMap[groupVal]
for _, groupEntity := range groupEntities {
subResData := subSearchResultData[groupEntity.subSearchIdx]
if len(ret.Results.FieldsData) > 0 {
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subResData.FieldsData, groupEntity.resultIdx)
}
typeutil.AppendPKs(ret.Results.Ids, groupEntity.id)
ret.Results.Scores = append(ret.Results.Scores, groupEntity.score)
gpFieldBuilder.Add(groupVal)
@ -336,7 +352,7 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
Results: &schemapb.SearchResultData{
NumQueries: nq,
TopK: topk,
FieldsData: typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit),
FieldsData: []*schemapb.FieldData{},
Scores: []float32{},
Ids: &schemapb.IDs{},
Topks: []int64{},
@ -354,6 +370,14 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
ret.GetResults().AllSearchCount = allSearchCount
}
// Find the first non-empty FieldsData as template
for _, result := range subSearchResultData {
if len(result.GetFieldsData()) > 0 {
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
break
}
}
subSearchNum := len(subSearchResultData)
if subSearchNum == 1 && offset == 0 {
// sorting is not needed if there is only one shard and no offset, assigning the result directly.
@ -407,7 +431,9 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
}
score := subSearchResultData[subSearchIdx].Scores[resultDataIdx]
if len(ret.Results.FieldsData) > 0 {
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subSearchResultData[subSearchIdx].FieldsData, resultDataIdx)
}
typeutil.CopyPk(ret.Results.Ids, subSearchResultData[subSearchIdx].GetIds(), int(resultDataIdx))
ret.Results.Scores = append(ret.Results.Scores, score)
cursors[subSearchIdx]++

View File

@ -84,6 +84,166 @@ func (struts *SearchReduceUtilTestSuite) TestReduceSearchResultWithEmtpyGroupDat
struts.Nil(results.Results.GetGroupByFieldValue())
}
// TestReduceWithEmptyFieldsData tests reduce functions when FieldsData is empty (requery scenario)
func (struts *SearchReduceUtilTestSuite) TestReduceWithEmptyFieldsData() {
ctx := context.Background()
nq := int64(1)
topK := int64(5)
offset := int64(0)
// Create search results with empty FieldsData (simulating requery scenario)
searchResultData1 := &schemapb.SearchResultData{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 2, 3, 4, 5},
},
},
},
Scores: []float32{0.9, 0.8, 0.7, 0.6, 0.5},
Topks: []int64{5},
NumQueries: nq,
TopK: topK,
FieldsData: []*schemapb.FieldData{}, // Empty FieldsData for requery
}
searchResultData2 := &schemapb.SearchResultData{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{6, 7, 8, 9, 10},
},
},
},
Scores: []float32{0.85, 0.75, 0.65, 0.55, 0.45},
Topks: []int64{5},
NumQueries: nq,
TopK: topK,
FieldsData: []*schemapb.FieldData{}, // Empty FieldsData for requery
}
// Test reduceSearchResultDataNoGroupBy with empty FieldsData
{
results, err := reduceSearchResultDataNoGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset)
struts.NoError(err)
struts.NotNil(results)
// Should have merged results without panic
struts.Equal(int64(5), results.Results.Topks[0])
// FieldsData should be empty since all inputs were empty
struts.Equal(0, len(results.Results.FieldsData))
}
// Test reduceSearchResultDataWithGroupBy with empty FieldsData
{
// Add GroupByFieldValue to support group by
searchResultData1.GroupByFieldValue = &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: "group",
FieldId: 101,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: []string{"a", "b", "c", "a", "b"},
},
},
},
},
}
searchResultData2.GroupByFieldValue = &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: "group",
FieldId: 101,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: []string{"c", "a", "b", "c", "a"},
},
},
},
},
}
results, err := reduceSearchResultDataWithGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset, int64(2))
struts.NoError(err)
struts.NotNil(results)
// FieldsData should be empty since all inputs were empty
struts.Equal(0, len(results.Results.FieldsData))
}
// Test reduceAdvanceGroupBy with empty FieldsData
{
results, err := reduceAdvanceGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, schemapb.DataType_Int64, "L2")
struts.NoError(err)
struts.NotNil(results)
// FieldsData should be empty since all inputs were empty
struts.Equal(0, len(results.Results.FieldsData))
}
}
// TestReduceWithPartialEmptyFieldsData tests when first result has empty FieldsData but second has data
func (struts *SearchReduceUtilTestSuite) TestReduceWithPartialEmptyFieldsData() {
ctx := context.Background()
nq := int64(1)
topK := int64(3)
offset := int64(0)
// First result with empty FieldsData
searchResultData1 := &schemapb.SearchResultData{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 2, 3},
},
},
},
Scores: []float32{0.9, 0.8, 0.7},
Topks: []int64{3},
NumQueries: nq,
TopK: topK,
FieldsData: []*schemapb.FieldData{}, // Empty
}
// Second result with non-empty FieldsData
searchResultData2 := &schemapb.SearchResultData{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{4, 5, 6},
},
},
},
Scores: []float32{0.85, 0.75, 0.65},
Topks: []int64{3},
NumQueries: nq,
TopK: topK,
FieldsData: []*schemapb.FieldData{
{
Type: schemapb.DataType_Int64,
FieldName: "field1",
FieldId: 100,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: []int64{40, 50, 60},
},
},
},
},
},
},
}
// Test: Should use the non-empty FieldsData from second result
results, err := reduceSearchResultDataNoGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset)
struts.NoError(err)
struts.NotNil(results)
// Should have initialized FieldsData from second result
struts.Greater(len(results.Results.FieldsData), 0)
}
func TestSearchReduceUtilTestSuite(t *testing.T) {
suite.Run(t, new(SearchReduceUtilTestSuite))
}

View File

@ -146,8 +146,12 @@ func newRerankOutputs(inputs *rerankInputs, searchParams *SearchParams) *rerankO
Ids: &schemapb.IDs{},
Topks: []int64{},
}
if len(inputs.fieldData) > 0 {
ret.FieldsData = typeutil.PrepareResultFieldData(inputs.fieldData[0].GetFieldsData(), searchParams.limit)
// Find the first non-empty fieldData and prepare result fields
for _, fieldData := range inputs.fieldData {
if fieldData != nil && len(fieldData.GetFieldsData()) > 0 {
ret.FieldsData = typeutil.PrepareResultFieldData(fieldData.GetFieldsData(), searchParams.limit)
break
}
}
return &rerankOutputs{ret}
}
@ -157,7 +161,7 @@ func appendResult[T PKType](inputs *rerankInputs, outputs *rerankOutputs, idScor
scores := idScores.scores
outputs.searchResultData.Topks = append(outputs.searchResultData.Topks, int64(len(ids)))
outputs.searchResultData.Scores = append(outputs.searchResultData.Scores, scores...)
if len(inputs.fieldData) > 0 {
if len(inputs.fieldData) > 0 && len(outputs.searchResultData.FieldsData) > 0 {
for idx := range ids {
loc := idScores.locations[idx]
typeutil.AppendFieldData(outputs.searchResultData.FieldsData, inputs.fieldData[loc.batchIdx].GetFieldsData(), int64(loc.offset))

View File

@ -202,3 +202,139 @@ func (s *UtilSuite) TestIsCrossMetrics() {
s.True(descending)
}
}
// TestNewRerankOutputsWithEmptyFieldsData tests newRerankOutputs when FieldsData is empty (requery scenario)
func (s *UtilSuite) TestNewRerankOutputsWithEmptyFieldsData() {
// Test case 1: All fieldData have empty FieldsData
{
inputs := &rerankInputs{
fieldData: []*schemapb.SearchResultData{
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 2, 3},
},
},
},
FieldsData: []*schemapb.FieldData{}, // Empty
},
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{4, 5, 6},
},
},
},
FieldsData: []*schemapb.FieldData{}, // Empty
},
},
}
searchParams := &SearchParams{limit: 10}
outputs := newRerankOutputs(inputs, searchParams)
s.NotNil(outputs)
// FieldsData should be empty since all inputs were empty
s.Equal(0, len(outputs.searchResultData.FieldsData))
}
// Test case 2: First fieldData has empty FieldsData, second has data
{
inputs := &rerankInputs{
fieldData: []*schemapb.SearchResultData{
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 2, 3},
},
},
},
FieldsData: []*schemapb.FieldData{}, // Empty
},
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{4, 5, 6},
},
},
},
FieldsData: []*schemapb.FieldData{
{
Type: schemapb.DataType_Int64,
FieldName: "field1",
FieldId: 100,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: []int64{40, 50, 60},
},
},
},
},
},
},
},
},
}
searchParams := &SearchParams{limit: 10}
outputs := newRerankOutputs(inputs, searchParams)
s.NotNil(outputs)
// Should use the second fieldData which has non-empty FieldsData
s.Greater(len(outputs.searchResultData.FieldsData), 0)
}
// Test case 3: nil fieldData
{
inputs := &rerankInputs{
fieldData: []*schemapb.SearchResultData{nil, nil},
}
searchParams := &SearchParams{limit: 10}
outputs := newRerankOutputs(inputs, searchParams)
s.NotNil(outputs)
// FieldsData should be empty
s.Equal(0, len(outputs.searchResultData.FieldsData))
}
}
// TestAppendResultWithEmptyFieldsData tests appendResult when FieldsData is empty
func (s *UtilSuite) TestAppendResultWithEmptyFieldsData() {
// Test case: appendResult should not panic when FieldsData is empty
inputs := &rerankInputs{
fieldData: []*schemapb.SearchResultData{
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 2, 3},
},
},
},
FieldsData: []*schemapb.FieldData{}, // Empty
},
},
}
searchParams := &SearchParams{limit: 10}
outputs := newRerankOutputs(inputs, searchParams)
// Create idScores with locations
idScores := &IDScores[int64]{
ids: []int64{1, 2},
scores: []float32{0.9, 0.8},
locations: []IDLoc{{batchIdx: 0, offset: 0}, {batchIdx: 0, offset: 1}},
}
// This should not panic even when FieldsData is empty
s.NotPanics(func() {
appendResult(inputs, outputs, idScores)
})
// Verify that IDs and scores were appended correctly
s.Equal(int64(2), outputs.searchResultData.Topks[0])
s.Equal([]float32{0.9, 0.8}, outputs.searchResultData.Scores)
s.Equal([]int64{1, 2}, outputs.searchResultData.Ids.GetIntId().Data)
// FieldsData should still be empty
s.Equal(0, len(outputs.searchResultData.FieldsData))
}