From 8efe9ccac6aaa30ebd45412d1ecdefae1734b858 Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:35:11 +0800 Subject: [PATCH] feat: Add support for using highlight without returning the field as the output field. (#45984) relate: https://github.com/milvus-io/milvus/issues/42589 Signed-off-by: aoiasd --- internal/proxy/highlighter.go | 13 +++++++++++-- internal/proxy/task_search.go | 31 ++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/internal/proxy/highlighter.go b/internal/proxy/highlighter.go index 60959d447d..38f9b830ae 100644 --- a/internal/proxy/highlighter.go +++ b/internal/proxy/highlighter.go @@ -36,6 +36,7 @@ const ( type Highlighter interface { AsSearchPipelineOperator(t *searchTask) (operator, error) + FieldIDs() []int64 } // highlight task for one field @@ -114,18 +115,26 @@ func (h *LexicalHighlighter) addTaskWithQuery(fieldID int64, query *highlightQue }) } -func (h *LexicalHighlighter) AsSearchPipelineOperator(t *searchTask) (operator, error) { +func (h *LexicalHighlighter) initHighlightQueries(t *searchTask) error { // add query to highlight tasks for _, query := range h.queries { fieldID, ok := t.schema.MapFieldID(query.fieldName) if !ok { - return nil, merr.WrapErrParameterInvalidMsg("highlight field not found in schema: %s", query.fieldName) + return merr.WrapErrParameterInvalidMsg("highlight field not found in schema: %s", query.fieldName) } h.addTaskWithQuery(fieldID, query) } + return nil +} + +func (h *LexicalHighlighter) AsSearchPipelineOperator(t *searchTask) (operator, error) { return newLexicalHighlightOperator(t, lo.Values(h.tasks)) } +func (h *LexicalHighlighter) FieldIDs() []int64 { + return lo.Keys(h.tasks) +} + func NewLexicalHighlighter(highlighter *commonpb.Highlighter) (*LexicalHighlighter, error) { params := funcutil.KeyValuePair2Map(highlighter.GetParams()) h := &LexicalHighlighter{ diff --git a/internal/proxy/task_search.go b/internal/proxy/task_search.go index 14fb6e25ce..4d2332733e 100644 --- a/internal/proxy/task_search.go +++ b/internal/proxy/task_search.go @@ -602,7 +602,12 @@ func (t *searchTask) createLexicalHighlighter(highlighter *commonpb.Highlighter, if err != nil { return err } - return h.addTaskWithSearchText(fieldId, fieldName, analyzerName, texts) + err = h.addTaskWithSearchText(fieldId, fieldName, analyzerName, texts) + if err != nil { + return err + } + + return h.initHighlightQueries(t) } return nil } @@ -642,10 +647,24 @@ func (t *searchTask) initSearchRequest(ctx context.Context) error { } } + analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV(AnalyzerKey, t.request.GetSearchParams()) + if err == nil { + t.SearchRequest.AnalyzerName = analyzer + } + t.isIterator = isIterator t.SearchRequest.Offset = offset t.SearchRequest.FieldId = queryInfo.GetQueryFieldId() + if err := t.addHighlightTask(t.request.GetHighlighter(), queryInfo.GetMetricType(), queryInfo.GetQueryFieldId(), t.request.GetPlaceholderGroup(), t.SearchRequest.GetAnalyzerName()); err != nil { + return err + } + + // add highlight field ids to output fields id + if t.highlighter != nil { + t.SearchRequest.OutputFieldsId = append(t.SearchRequest.OutputFieldsId, t.highlighter.FieldIDs()...) + } + if t.partitionKeyMode { // isolation has tighter constraint, check first mvErr := setQueryInfoIfMvEnable(queryInfo, t, plan) @@ -696,16 +715,6 @@ func (t *searchTask) initSearchRequest(ctx context.Context) error { t.SearchRequest.GroupByFieldId = queryInfo.GroupByFieldId t.SearchRequest.GroupSize = queryInfo.GroupSize - if t.SearchRequest.MetricType == metric.BM25 { - analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV(AnalyzerKey, t.request.GetSearchParams()) - if err == nil { - t.SearchRequest.AnalyzerName = analyzer - } - } - if err := t.addHighlightTask(t.request.GetHighlighter(), t.SearchRequest.MetricType, t.SearchRequest.FieldId, t.request.GetPlaceholderGroup(), t.SearchRequest.GetAnalyzerName()); err != nil { - return err - } - if embedding.HasNonBM25Functions(t.schema.CollectionSchema.Functions, []int64{queryInfo.GetQueryFieldId()}) { ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-call-function-udf") defer sp.End()