fix: Init analyzer at delegator for all field with enable analyzer (#46361)

To support text match highlight
relate: https://github.com/milvus-io/milvus/issues/46308

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
aoiasd 2025-12-19 10:23:18 +08:00 committed by GitHub
parent 80fff56364
commit 7e4f87e351
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 0 deletions

View File

@ -1207,6 +1207,17 @@ func NewShardDelegator(ctx context.Context, collectionID UniqueID, replicaID Uni
}
}
for _, field := range collection.Schema().GetFields() {
helper := typeutil.CreateFieldSchemaHelper(field)
if helper.EnableAnalyzer() && sd.analyzerRunners[field.GetFieldID()] == nil {
analyzerRunner, err := function.NewAnalyzerRunner(field)
if err != nil {
return nil, err
}
sd.analyzerRunners[field.GetFieldID()] = analyzerRunner
}
}
if len(sd.isBM25Field) > 0 {
sd.idfOracle = NewIDFOracle(sd.vchannelName, collection.Schema().GetFunctions())
sd.distribution.SetIDFOracle(sd.idfOracle)

View File

@ -62,6 +62,20 @@ func getAnalyzerParams(field *schemapb.FieldSchema) string {
return "{}"
}
func NewAnalyzerRunner(field *schemapb.FieldSchema) (Analyzer, error) {
params := getAnalyzerParams(field)
tokenizer, err := analyzer.NewAnalyzer(params)
if err != nil {
return nil, err
}
return &BM25FunctionRunner{
inputField: field,
tokenizer: tokenizer,
concurrency: 8,
}, nil
}
func NewBM25FunctionRunner(coll *schemapb.CollectionSchema, schema *schemapb.FunctionSchema) (FunctionRunner, error) {
if len(schema.GetOutputFieldIds()) != 1 {
return nil, fmt.Errorf("bm25 function should only have one output field, but now %d", len(schema.GetOutputFieldIds()))