enhance: [2.5] Add param item for hybrid search requery policy (#44467)

Cherry-pick from master
pr: #44466
related to #39757

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-09-24 17:52:07 +08:00 committed by GitHub
parent cdcad7b1c7
commit d251e102b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 53 additions and 5 deletions

View File

@ -515,6 +515,9 @@ func rankSearchResultDataByGroup(ctx context.Context,
return ret, nil
}
// init FieldsData
ret.Results.FieldsData = typeutil.PrepareResultFieldData(searchResults[0].GetResults().GetFieldsData(), limit)
totalCount := limit * groupSize
if err := setupIdListForSearchResult(ret, pkType, totalCount); err != nil {
return ret, err
@ -526,11 +529,18 @@ func rankSearchResultDataByGroup(ctx context.Context,
}
accumulatedScores := make([]map[interface{}]*accumulateIDGroupVal, nq)
type dataLoc struct {
resultIdx int
offset int
}
pk2DataOffset := make([]map[any]dataLoc, nq)
for i := int64(0); i < nq; i++ {
accumulatedScores[i] = make(map[interface{}]*accumulateIDGroupVal)
pk2DataOffset[i] = make(map[any]dataLoc)
}
groupByDataType := searchResults[0].GetResults().GetGroupByFieldValue().GetType()
for _, result := range searchResults {
for ri, result := range searchResults {
scores := result.GetResults().GetScores()
start := 0
// milvus has limits for the value range of nq and limit
@ -540,6 +550,7 @@ func rankSearchResultDataByGroup(ctx context.Context,
for j := start; j < start+realTopK; j++ {
id := typeutil.GetPK(result.GetResults().GetIds(), int64(j))
groupByVal := typeutil.GetData(result.GetResults().GetGroupByFieldValue(), j)
pk2DataOffset[i][id] = dataLoc{resultIdx: ri, offset: j}
if accumulatedScores[i][id] != nil {
accumulatedScores[i][id].accumulatedScore += scores[j]
} else {
@ -623,14 +634,16 @@ func rankSearchResultDataByGroup(ctx context.Context,
returnedRowNum := 0
for index := int(offset); index < len(groupList); index++ {
group := groupList[index]
for i, score := range group.scoreList {
for idx, score := range group.scoreList {
// idList and scoreList must have same length
typeutil.AppendPKs(ret.Results.Ids, group.idList[i])
typeutil.AppendPKs(ret.Results.Ids, group.idList[idx])
if roundDecimal != -1 {
multiplier := math.Pow(10.0, float64(roundDecimal))
score = float32(math.Floor(float64(score)*multiplier+0.5) / multiplier)
}
ret.Results.Scores = append(ret.Results.Scores, score)
loc := pk2DataOffset[i][group.idList[idx]]
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), int64(loc.offset))
typeutil.AppendGroupByValue(ret.Results, group.groupVal, groupByDataType)
}
returnedRowNum += len(group.idList)
@ -699,23 +712,34 @@ func rankSearchResultDataByPk(ctx context.Context,
return ret, nil
}
// init FieldsData
ret.Results.FieldsData = typeutil.PrepareResultFieldData(searchResults[0].GetResults().GetFieldsData(), limit)
if err := setupIdListForSearchResult(ret, pkType, limit); err != nil {
return ret, nil
}
// []map[id]score
accumulatedScores := make([]map[interface{}]float32, nq)
type dataLoc struct {
resultIdx int
offset int64
}
pk2DataOffset := make([]map[any]dataLoc, nq)
for i := int64(0); i < nq; i++ {
accumulatedScores[i] = make(map[interface{}]float32)
pk2DataOffset[i] = make(map[any]dataLoc)
}
for _, result := range searchResults {
for ri, result := range searchResults {
scores := result.GetResults().GetScores()
start := int64(0)
for i := int64(0); i < nq; i++ {
realTopk := result.GetResults().Topks[i]
for j := start; j < start+realTopk; j++ {
id := typeutil.GetPK(result.GetResults().GetIds(), j)
pk2DataOffset[i][id] = dataLoc{resultIdx: ri, offset: j}
accumulatedScores[i][id] += scores[j]
}
start += realTopk
@ -758,6 +782,8 @@ func rankSearchResultDataByPk(ctx context.Context,
score = float32(math.Floor(float64(score)*multiplier+0.5) / multiplier)
}
ret.Results.Scores = append(ret.Results.Scores, score)
loc := pk2DataOffset[i][keys[index]]
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), loc.offset)
}
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"math"
"strconv"
"strings"
"time"
"github.com/cockroachdb/errors"
@ -215,7 +216,16 @@ func (t *searchTask) PreExecute(ctx context.Context) error {
})
if t.SearchRequest.GetIsAdvanced() {
t.requery = len(t.translatedOutputFields) > 0
switch strings.ToLower(paramtable.Get().CommonCfg.HybridSearchRequeryPolicy.GetValue()) {
case "always":
t.requery = true
case "outputvector":
t.requery = len(vectorOutputFields) > 0
case "outputfields":
fallthrough
default:
t.requery = len(t.request.GetOutputFields()) > 0
}
err = t.initAdvancedSearchRequest(ctx)
} else {
t.requery = len(vectorOutputFields) > 0

View File

@ -317,6 +317,8 @@ type commonConfig struct {
EnableConfigParamTypeCheck ParamItem `refreshable:"true"`
ClusterID ParamItem `refreshable:"false"`
HybridSearchRequeryPolicy ParamItem `refreshable:"true"`
}
func (p *commonConfig) init(base *BaseTable) {
@ -1155,6 +1157,7 @@ This helps Milvus-CDC synchronize incremental data`,
Export: true,
}
p.EnableConfigParamTypeCheck.Init(base.mgr)
p.ClusterID = ParamItem{
Key: "common.clusterID",
Version: "2.6.3",
@ -1174,6 +1177,15 @@ This helps Milvus-CDC synchronize incremental data`,
},
}
p.ClusterID.Init(base.mgr)
p.HybridSearchRequeryPolicy = ParamItem{
Key: "common.requery.hybridSearchPolicy",
Version: "2.5.18",
DefaultValue: "OutputVector",
Doc: `the policy to decide when to do requery in hybrid search, support "always", "outputvector" and "outputfields"`,
Export: false,
}
p.HybridSearchRequeryPolicy.Init(base.mgr)
}
type gpuConfig struct {