From 7e4f87e35148ab4fcaf967c934b52cbcf4fef1e9 Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Fri, 19 Dec 2025 10:23:18 +0800 Subject: [PATCH] fix: Init analyzer at delegator for all field with enable analyzer (#46361) To support text match highlight relate: https://github.com/milvus-io/milvus/issues/46308 Signed-off-by: aoiasd --- internal/querynodev2/delegator/delegator.go | 11 +++++++++++ internal/util/function/bm25_function.go | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/internal/querynodev2/delegator/delegator.go b/internal/querynodev2/delegator/delegator.go index 5e68107cc7..c1da5b9fe2 100644 --- a/internal/querynodev2/delegator/delegator.go +++ b/internal/querynodev2/delegator/delegator.go @@ -1207,6 +1207,17 @@ func NewShardDelegator(ctx context.Context, collectionID UniqueID, replicaID Uni } } + for _, field := range collection.Schema().GetFields() { + helper := typeutil.CreateFieldSchemaHelper(field) + if helper.EnableAnalyzer() && sd.analyzerRunners[field.GetFieldID()] == nil { + analyzerRunner, err := function.NewAnalyzerRunner(field) + if err != nil { + return nil, err + } + sd.analyzerRunners[field.GetFieldID()] = analyzerRunner + } + } + if len(sd.isBM25Field) > 0 { sd.idfOracle = NewIDFOracle(sd.vchannelName, collection.Schema().GetFunctions()) sd.distribution.SetIDFOracle(sd.idfOracle) diff --git a/internal/util/function/bm25_function.go b/internal/util/function/bm25_function.go index 37c5f9186d..71c4440ce0 100644 --- a/internal/util/function/bm25_function.go +++ b/internal/util/function/bm25_function.go @@ -62,6 +62,20 @@ func getAnalyzerParams(field *schemapb.FieldSchema) string { return "{}" } +func NewAnalyzerRunner(field *schemapb.FieldSchema) (Analyzer, error) { + params := getAnalyzerParams(field) + tokenizer, err := analyzer.NewAnalyzer(params) + if err != nil { + return nil, err + } + + return &BM25FunctionRunner{ + inputField: field, + tokenizer: tokenizer, + concurrency: 8, + }, nil +} + func NewBM25FunctionRunner(coll *schemapb.CollectionSchema, schema *schemapb.FunctionSchema) (FunctionRunner, error) { if len(schema.GetOutputFieldIds()) != 1 { return nil, fmt.Errorf("bm25 function should only have one output field, but now %d", len(schema.GetOutputFieldIds()))