mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
feat: [2.6] Support search with highlighter (#46052)
relate: https://github.com/milvus-io/milvus/issues/42589 pr: https://github.com/milvus-io/milvus/pull/45736 https://github.com/milvus-io/milvus/pull/45099 https://github.com/milvus-io/milvus/pull/44923 https://github.com/milvus-io/milvus/pull/45984 --------- Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
parent
2029551c96
commit
8bdbc4379e
2
go.mod
2
go.mod
@ -21,7 +21,7 @@ require (
|
|||||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
|
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
|
||||||
github.com/klauspost/compress v1.18.0
|
github.com/klauspost/compress v1.18.0
|
||||||
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
|
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c
|
||||||
github.com/minio/minio-go/v7 v7.0.73
|
github.com/minio/minio-go/v7 v7.0.73
|
||||||
github.com/panjf2000/ants/v2 v2.11.3 // indirect
|
github.com/panjf2000/ants/v2 v2.11.3 // indirect
|
||||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect
|
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect
|
||||||
|
|||||||
4
go.sum
4
go.sum
@ -799,8 +799,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6 h1:YHMFI6L
|
|||||||
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
|
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
|
||||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
|
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
|
||||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7 h1:RJtZbkS5zKNIXxsqjGBUZc2SbnI4MGq+TfOfc8tJsuM=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c h1:Gh02wIJEI6RUbEXwZworPBfK9BYd1SVBIDHDL8GsrCY=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
||||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
||||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
|
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
|
||||||
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
|
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
|
||||||
|
|||||||
@ -394,3 +394,14 @@ func (c *Client) DropIndex(ctx context.Context, req *querypb.DropIndexRequest, _
|
|||||||
return client.DropIndex(ctx, req)
|
return client.DropIndex(ctx, req)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest, _ ...grpc.CallOption) (*querypb.GetHighlightResponse, error) {
|
||||||
|
req = typeutil.Clone(req)
|
||||||
|
commonpbutil.UpdateMsgBase(
|
||||||
|
req.GetBase(),
|
||||||
|
commonpbutil.FillMsgBaseFromClient(c.nodeID),
|
||||||
|
)
|
||||||
|
return wrapGrpcCall(ctx, c, func(client querypb.QueryNodeClient) (*querypb.GetHighlightResponse, error) {
|
||||||
|
return client.GetHighlight(ctx, req)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -111,6 +111,12 @@ func Test_NewClient(t *testing.T) {
|
|||||||
r21, err := client.DeleteBatch(ctx, nil)
|
r21, err := client.DeleteBatch(ctx, nil)
|
||||||
retCheck(retNotNil, r21, err)
|
retCheck(retNotNil, r21, err)
|
||||||
|
|
||||||
|
r22, err := client.RunAnalyzer(ctx, nil)
|
||||||
|
retCheck(retNotNil, r22, err)
|
||||||
|
|
||||||
|
r24, err := client.GetHighlight(ctx, nil)
|
||||||
|
retCheck(retNotNil, r24, err)
|
||||||
|
|
||||||
// stream rpc
|
// stream rpc
|
||||||
client, err := client.QueryStream(ctx, nil)
|
client, err := client.QueryStream(ctx, nil)
|
||||||
retCheck(retNotNil, client, err)
|
retCheck(retNotNil, client, err)
|
||||||
|
|||||||
@ -407,3 +407,7 @@ func (s *Server) RunAnalyzer(ctx context.Context, req *querypb.RunAnalyzerReques
|
|||||||
func (s *Server) DropIndex(ctx context.Context, req *querypb.DropIndexRequest) (*commonpb.Status, error) {
|
func (s *Server) DropIndex(ctx context.Context, req *querypb.DropIndexRequest) (*commonpb.Status, error) {
|
||||||
return s.querynode.DropIndex(ctx, req)
|
return s.querynode.DropIndex(ctx, req)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Server) GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error) {
|
||||||
|
return s.querynode.GetHighlight(ctx, req)
|
||||||
|
}
|
||||||
|
|||||||
@ -280,6 +280,28 @@ func Test_NewServer(t *testing.T) {
|
|||||||
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("RunAnalyzer", func(t *testing.T) {
|
||||||
|
mockQN.EXPECT().RunAnalyzer(mock.Anything, mock.Anything).Return(&milvuspb.RunAnalyzerResponse{
|
||||||
|
Status: &commonpb.Status{ErrorCode: commonpb.ErrorCode_Success},
|
||||||
|
}, nil)
|
||||||
|
req := &querypb.RunAnalyzerRequest{}
|
||||||
|
resp, err := server.RunAnalyzer(ctx, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("GetHighlight", func(t *testing.T) {
|
||||||
|
mockQN.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(&querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Success(),
|
||||||
|
}, nil)
|
||||||
|
|
||||||
|
resp, err := server.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Channel: "test-channel",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
|
||||||
|
})
|
||||||
|
|
||||||
err = server.Stop()
|
err = server.Stop()
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -370,6 +370,65 @@ func (_c *MockQueryNode_GetDataDistribution_Call) RunAndReturn(run func(context.
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetHighlight provides a mock function with given fields: _a0, _a1
|
||||||
|
func (_m *MockQueryNode) GetHighlight(_a0 context.Context, _a1 *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error) {
|
||||||
|
ret := _m.Called(_a0, _a1)
|
||||||
|
|
||||||
|
if len(ret) == 0 {
|
||||||
|
panic("no return value specified for GetHighlight")
|
||||||
|
}
|
||||||
|
|
||||||
|
var r0 *querypb.GetHighlightResponse
|
||||||
|
var r1 error
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error)); ok {
|
||||||
|
return rf(_a0, _a1)
|
||||||
|
}
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) *querypb.GetHighlightResponse); ok {
|
||||||
|
r0 = rf(_a0, _a1)
|
||||||
|
} else {
|
||||||
|
if ret.Get(0) != nil {
|
||||||
|
r0 = ret.Get(0).(*querypb.GetHighlightResponse)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if rf, ok := ret.Get(1).(func(context.Context, *querypb.GetHighlightRequest) error); ok {
|
||||||
|
r1 = rf(_a0, _a1)
|
||||||
|
} else {
|
||||||
|
r1 = ret.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r0, r1
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockQueryNode_GetHighlight_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHighlight'
|
||||||
|
type MockQueryNode_GetHighlight_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHighlight is a helper method to define mock.On call
|
||||||
|
// - _a0 context.Context
|
||||||
|
// - _a1 *querypb.GetHighlightRequest
|
||||||
|
func (_e *MockQueryNode_Expecter) GetHighlight(_a0 interface{}, _a1 interface{}) *MockQueryNode_GetHighlight_Call {
|
||||||
|
return &MockQueryNode_GetHighlight_Call{Call: _e.mock.On("GetHighlight", _a0, _a1)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNode_GetHighlight_Call) Run(run func(_a0 context.Context, _a1 *querypb.GetHighlightRequest)) *MockQueryNode_GetHighlight_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
run(args[0].(context.Context), args[1].(*querypb.GetHighlightRequest))
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNode_GetHighlight_Call) Return(_a0 *querypb.GetHighlightResponse, _a1 error) *MockQueryNode_GetHighlight_Call {
|
||||||
|
_c.Call.Return(_a0, _a1)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNode_GetHighlight_Call) RunAndReturn(run func(context.Context, *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error)) *MockQueryNode_GetHighlight_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
// GetMetrics provides a mock function with given fields: _a0, _a1
|
// GetMetrics provides a mock function with given fields: _a0, _a1
|
||||||
func (_m *MockQueryNode) GetMetrics(_a0 context.Context, _a1 *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
func (_m *MockQueryNode) GetMetrics(_a0 context.Context, _a1 *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
||||||
ret := _m.Called(_a0, _a1)
|
ret := _m.Called(_a0, _a1)
|
||||||
|
|||||||
@ -446,6 +446,80 @@ func (_c *MockQueryNodeClient_GetDataDistribution_Call) RunAndReturn(run func(co
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetHighlight provides a mock function with given fields: ctx, in, opts
|
||||||
|
func (_m *MockQueryNodeClient) GetHighlight(ctx context.Context, in *querypb.GetHighlightRequest, opts ...grpc.CallOption) (*querypb.GetHighlightResponse, error) {
|
||||||
|
_va := make([]interface{}, len(opts))
|
||||||
|
for _i := range opts {
|
||||||
|
_va[_i] = opts[_i]
|
||||||
|
}
|
||||||
|
var _ca []interface{}
|
||||||
|
_ca = append(_ca, ctx, in)
|
||||||
|
_ca = append(_ca, _va...)
|
||||||
|
ret := _m.Called(_ca...)
|
||||||
|
|
||||||
|
if len(ret) == 0 {
|
||||||
|
panic("no return value specified for GetHighlight")
|
||||||
|
}
|
||||||
|
|
||||||
|
var r0 *querypb.GetHighlightResponse
|
||||||
|
var r1 error
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest, ...grpc.CallOption) (*querypb.GetHighlightResponse, error)); ok {
|
||||||
|
return rf(ctx, in, opts...)
|
||||||
|
}
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest, ...grpc.CallOption) *querypb.GetHighlightResponse); ok {
|
||||||
|
r0 = rf(ctx, in, opts...)
|
||||||
|
} else {
|
||||||
|
if ret.Get(0) != nil {
|
||||||
|
r0 = ret.Get(0).(*querypb.GetHighlightResponse)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if rf, ok := ret.Get(1).(func(context.Context, *querypb.GetHighlightRequest, ...grpc.CallOption) error); ok {
|
||||||
|
r1 = rf(ctx, in, opts...)
|
||||||
|
} else {
|
||||||
|
r1 = ret.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r0, r1
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockQueryNodeClient_GetHighlight_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHighlight'
|
||||||
|
type MockQueryNodeClient_GetHighlight_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHighlight is a helper method to define mock.On call
|
||||||
|
// - ctx context.Context
|
||||||
|
// - in *querypb.GetHighlightRequest
|
||||||
|
// - opts ...grpc.CallOption
|
||||||
|
func (_e *MockQueryNodeClient_Expecter) GetHighlight(ctx interface{}, in interface{}, opts ...interface{}) *MockQueryNodeClient_GetHighlight_Call {
|
||||||
|
return &MockQueryNodeClient_GetHighlight_Call{Call: _e.mock.On("GetHighlight",
|
||||||
|
append([]interface{}{ctx, in}, opts...)...)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeClient_GetHighlight_Call) Run(run func(ctx context.Context, in *querypb.GetHighlightRequest, opts ...grpc.CallOption)) *MockQueryNodeClient_GetHighlight_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
variadicArgs := make([]grpc.CallOption, len(args)-2)
|
||||||
|
for i, a := range args[2:] {
|
||||||
|
if a != nil {
|
||||||
|
variadicArgs[i] = a.(grpc.CallOption)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
run(args[0].(context.Context), args[1].(*querypb.GetHighlightRequest), variadicArgs...)
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeClient_GetHighlight_Call) Return(_a0 *querypb.GetHighlightResponse, _a1 error) *MockQueryNodeClient_GetHighlight_Call {
|
||||||
|
_c.Call.Return(_a0, _a1)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeClient_GetHighlight_Call) RunAndReturn(run func(context.Context, *querypb.GetHighlightRequest, ...grpc.CallOption) (*querypb.GetHighlightResponse, error)) *MockQueryNodeClient_GetHighlight_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
// GetMetrics provides a mock function with given fields: ctx, in, opts
|
// GetMetrics provides a mock function with given fields: ctx, in, opts
|
||||||
func (_m *MockQueryNodeClient) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest, opts ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
|
func (_m *MockQueryNodeClient) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest, opts ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
|
||||||
_va := make([]interface{}, len(opts))
|
_va := make([]interface{}, len(opts))
|
||||||
|
|||||||
403
internal/proxy/highlighter.go
Normal file
403
internal/proxy/highlighter.go
Normal file
@ -0,0 +1,403 @@
|
|||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
"github.com/samber/lo"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/internal/proxy/shardclient"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
PreTagsKey = "pre_tags"
|
||||||
|
PostTagsKey = "post_tags"
|
||||||
|
HighlightSearchTextKey = "highlight_search_text"
|
||||||
|
HighlightQueryKey = "queries"
|
||||||
|
FragmentOffsetKey = "fragment_offset"
|
||||||
|
FragmentSizeKey = "fragment_size"
|
||||||
|
FragmentNumKey = "num_of_fragments"
|
||||||
|
DefaultFragmentSize = 100
|
||||||
|
DefaultFragmentNum = 5
|
||||||
|
DefaultPreTag = "<em>"
|
||||||
|
DefaultPostTag = "</em>"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Highlighter interface {
|
||||||
|
AsSearchPipelineOperator(t *searchTask) (operator, error)
|
||||||
|
FieldIDs() []int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// highlight task for one field
|
||||||
|
type highlightTask struct {
|
||||||
|
*querypb.HighlightTask
|
||||||
|
preTags [][]byte
|
||||||
|
postTags [][]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type highlightQuery struct {
|
||||||
|
text string
|
||||||
|
fieldName string
|
||||||
|
highlightType querypb.HighlightQueryType
|
||||||
|
}
|
||||||
|
|
||||||
|
type LexicalHighlighter struct {
|
||||||
|
tasks map[int64]*highlightTask // fieldID -> highlightTask
|
||||||
|
// option for all highlight task
|
||||||
|
// TODO: support set option for each task
|
||||||
|
preTags [][]byte
|
||||||
|
postTags [][]byte
|
||||||
|
highlightSearch bool
|
||||||
|
options *querypb.HighlightOptions
|
||||||
|
queries []*highlightQuery
|
||||||
|
}
|
||||||
|
|
||||||
|
// add highlight task with search
|
||||||
|
// must used before addTaskWithQuery
|
||||||
|
func (h *LexicalHighlighter) addTaskWithSearchText(fieldID int64, fieldName string, analyzerName string, texts []string) error {
|
||||||
|
_, ok := h.tasks[fieldID]
|
||||||
|
if ok {
|
||||||
|
return merr.WrapErrParameterInvalidMsg("not support hybrid search with highlight now. fieldID: %d", fieldID)
|
||||||
|
}
|
||||||
|
|
||||||
|
task := &highlightTask{
|
||||||
|
preTags: h.preTags,
|
||||||
|
postTags: h.postTags,
|
||||||
|
HighlightTask: &querypb.HighlightTask{
|
||||||
|
FieldName: fieldName,
|
||||||
|
FieldId: fieldID,
|
||||||
|
Options: h.options,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
h.tasks[fieldID] = task
|
||||||
|
|
||||||
|
task.Texts = texts
|
||||||
|
task.SearchTextNum = int64(len(texts))
|
||||||
|
if analyzerName != "" {
|
||||||
|
task.AnalyzerNames = []string{}
|
||||||
|
for i := 0; i < len(texts); i++ {
|
||||||
|
task.AnalyzerNames = append(task.AnalyzerNames, analyzerName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *LexicalHighlighter) addTaskWithQuery(fieldID int64, query *highlightQuery) {
|
||||||
|
task, ok := h.tasks[fieldID]
|
||||||
|
if !ok {
|
||||||
|
task = &highlightTask{
|
||||||
|
HighlightTask: &querypb.HighlightTask{
|
||||||
|
Texts: []string{},
|
||||||
|
FieldId: fieldID,
|
||||||
|
FieldName: query.fieldName,
|
||||||
|
Options: h.options,
|
||||||
|
},
|
||||||
|
preTags: h.preTags,
|
||||||
|
postTags: h.postTags,
|
||||||
|
}
|
||||||
|
h.tasks[fieldID] = task
|
||||||
|
}
|
||||||
|
|
||||||
|
task.Texts = append(task.Texts, query.text)
|
||||||
|
task.Queries = append(task.Queries, &querypb.HighlightQuery{
|
||||||
|
Type: query.highlightType,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *LexicalHighlighter) initHighlightQueries(t *searchTask) error {
|
||||||
|
// add query to highlight tasks
|
||||||
|
for _, query := range h.queries {
|
||||||
|
fieldID, ok := t.schema.MapFieldID(query.fieldName)
|
||||||
|
if !ok {
|
||||||
|
return merr.WrapErrParameterInvalidMsg("highlight field not found in schema: %s", query.fieldName)
|
||||||
|
}
|
||||||
|
h.addTaskWithQuery(fieldID, query)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *LexicalHighlighter) AsSearchPipelineOperator(t *searchTask) (operator, error) {
|
||||||
|
return newLexicalHighlightOperator(t, lo.Values(h.tasks))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *LexicalHighlighter) FieldIDs() []int64 {
|
||||||
|
return lo.Keys(h.tasks)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLexicalHighlighter(highlighter *commonpb.Highlighter) (*LexicalHighlighter, error) {
|
||||||
|
params := funcutil.KeyValuePair2Map(highlighter.GetParams())
|
||||||
|
h := &LexicalHighlighter{
|
||||||
|
tasks: make(map[int64]*highlightTask),
|
||||||
|
options: &querypb.HighlightOptions{},
|
||||||
|
}
|
||||||
|
|
||||||
|
// set pre_tags and post_tags
|
||||||
|
if value, ok := params[PreTagsKey]; ok {
|
||||||
|
tags := []string{}
|
||||||
|
if err := json.Unmarshal([]byte(value), &tags); err != nil {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal pre_tags as string array failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(tags) == 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("pre_tags cannot be empty list")
|
||||||
|
}
|
||||||
|
|
||||||
|
h.preTags = make([][]byte, len(tags))
|
||||||
|
for i, tag := range tags {
|
||||||
|
h.preTags[i] = []byte(tag)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
h.preTags = [][]byte{[]byte(DefaultPreTag)}
|
||||||
|
}
|
||||||
|
|
||||||
|
if value, ok := params[PostTagsKey]; ok {
|
||||||
|
tags := []string{}
|
||||||
|
if err := json.Unmarshal([]byte(value), &tags); err != nil {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal post_tags as string list failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(tags) == 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("post_tags cannot be empty list")
|
||||||
|
}
|
||||||
|
h.postTags = make([][]byte, len(tags))
|
||||||
|
for i, tag := range tags {
|
||||||
|
h.postTags[i] = []byte(tag)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
h.postTags = [][]byte{[]byte(DefaultPostTag)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set fragment config
|
||||||
|
if value, ok := params[FragmentSizeKey]; ok {
|
||||||
|
fragmentSize, err := strconv.ParseInt(value, 10, 64)
|
||||||
|
if err != nil || fragmentSize <= 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("invalid fragment_size: %s", value)
|
||||||
|
}
|
||||||
|
h.options.FragmentSize = fragmentSize
|
||||||
|
} else {
|
||||||
|
h.options.FragmentSize = DefaultFragmentSize
|
||||||
|
}
|
||||||
|
|
||||||
|
if value, ok := params[FragmentNumKey]; ok {
|
||||||
|
fragmentNum, err := strconv.ParseInt(value, 10, 64)
|
||||||
|
if err != nil || fragmentNum < 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("invalid num_of_fragments: %s", value)
|
||||||
|
}
|
||||||
|
h.options.NumOfFragments = fragmentNum
|
||||||
|
} else {
|
||||||
|
h.options.NumOfFragments = DefaultFragmentNum
|
||||||
|
}
|
||||||
|
|
||||||
|
if value, ok := params[FragmentOffsetKey]; ok {
|
||||||
|
fragmentOffset, err := strconv.ParseInt(value, 10, 64)
|
||||||
|
if err != nil || fragmentOffset < 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("invalid fragment_offset: %s", value)
|
||||||
|
}
|
||||||
|
h.options.FragmentOffset = fragmentOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
if value, ok := params[HighlightSearchTextKey]; ok {
|
||||||
|
enable, err := strconv.ParseBool(value)
|
||||||
|
if err != nil {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight_search_text as bool failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
h.highlightSearch = enable
|
||||||
|
}
|
||||||
|
|
||||||
|
if value, ok := params[HighlightQueryKey]; ok {
|
||||||
|
queries := []any{}
|
||||||
|
if err := json.Unmarshal([]byte(value), &queries); err != nil {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries as json array failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, query := range queries {
|
||||||
|
m, ok := query.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: item in array is not json object")
|
||||||
|
}
|
||||||
|
|
||||||
|
text, ok := m["text"]
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: must set `text` in query")
|
||||||
|
}
|
||||||
|
|
||||||
|
textStr, ok := text.(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: `text` must be string")
|
||||||
|
}
|
||||||
|
|
||||||
|
t, ok := m["type"]
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: must set `type` in query")
|
||||||
|
}
|
||||||
|
|
||||||
|
typeStr, ok := t.(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: `type` must be string")
|
||||||
|
}
|
||||||
|
|
||||||
|
typeEnum, ok := querypb.HighlightQueryType_value[typeStr]
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: invalid highlight query type: %s", typeStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, ok := m["field"]
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: must set `field` in query")
|
||||||
|
}
|
||||||
|
fieldStr, ok := f.(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("unmarshal highlight queries failed: `field` must be string")
|
||||||
|
}
|
||||||
|
|
||||||
|
h.queries = append(h.queries, &highlightQuery{
|
||||||
|
text: textStr,
|
||||||
|
highlightType: querypb.HighlightQueryType(typeEnum),
|
||||||
|
fieldName: fieldStr,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return h, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type lexicalHighlightOperator struct {
|
||||||
|
tasks []*highlightTask
|
||||||
|
fieldSchemas []*schemapb.FieldSchema
|
||||||
|
lbPolicy shardclient.LBPolicy
|
||||||
|
scheduler *taskScheduler
|
||||||
|
|
||||||
|
collectionName string
|
||||||
|
collectionID int64
|
||||||
|
dbName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newLexicalHighlightOperator(t *searchTask, tasks []*highlightTask) (operator, error) {
|
||||||
|
return &lexicalHighlightOperator{
|
||||||
|
tasks: tasks,
|
||||||
|
lbPolicy: t.lb,
|
||||||
|
scheduler: t.node.(*Proxy).sched,
|
||||||
|
fieldSchemas: typeutil.GetAllFieldSchemas(t.schema.CollectionSchema),
|
||||||
|
collectionName: t.request.CollectionName,
|
||||||
|
collectionID: t.CollectionID,
|
||||||
|
dbName: t.request.DbName,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (op *lexicalHighlightOperator) run(ctx context.Context, span trace.Span, inputs ...any) ([]any, error) {
|
||||||
|
result := inputs[0].(*milvuspb.SearchResults)
|
||||||
|
datas := result.Results.GetFieldsData()
|
||||||
|
req := &querypb.GetHighlightRequest{
|
||||||
|
Topks: result.GetResults().GetTopks(),
|
||||||
|
Tasks: lo.Map(op.tasks, func(task *highlightTask, _ int) *querypb.HighlightTask { return task.HighlightTask }),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, task := range req.GetTasks() {
|
||||||
|
textFieldDatas, ok := lo.Find(datas, func(data *schemapb.FieldData) bool { return data.FieldId == task.GetFieldId() })
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.Errorf("get highlight failed, text field not in output field %s: %d", task.GetFieldName(), task.GetFieldId())
|
||||||
|
}
|
||||||
|
texts := textFieldDatas.GetScalars().GetStringData().GetData()
|
||||||
|
task.Texts = append(task.Texts, texts...)
|
||||||
|
task.CorpusTextNum = int64(len(texts))
|
||||||
|
field, ok := lo.Find(op.fieldSchemas, func(schema *schemapb.FieldSchema) bool {
|
||||||
|
return schema.GetFieldID() == task.GetFieldId()
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.Errorf("get highlight failed, field not found in schema %s: %d", task.GetFieldName(), task.GetFieldId())
|
||||||
|
}
|
||||||
|
|
||||||
|
// if use multi analyzer
|
||||||
|
// get analyzer field data
|
||||||
|
helper := typeutil.CreateFieldSchemaHelper(field)
|
||||||
|
if v, ok := helper.GetMultiAnalyzerParams(); ok {
|
||||||
|
params := map[string]any{}
|
||||||
|
err := json.Unmarshal([]byte(v), ¶ms)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Errorf("get highlight failed, get invalid multi analyzer params-: %v", err)
|
||||||
|
}
|
||||||
|
analyzerField, ok := params["by_field"]
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.Errorf("get highlight failed, get invalid multi analyzer params, no by_field")
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzerFieldDatas, ok := lo.Find(datas, func(data *schemapb.FieldData) bool { return data.FieldName == analyzerField.(string) })
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.Errorf("get highlight failed, analyzer field not in output field")
|
||||||
|
}
|
||||||
|
task.AnalyzerNames = append(task.AnalyzerNames, analyzerFieldDatas.GetScalars().GetStringData().GetData()...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
lb: op.lbPolicy,
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
GetHighlightRequest: req,
|
||||||
|
collectionName: op.collectionName,
|
||||||
|
collectionID: op.collectionID,
|
||||||
|
dbName: op.dbName,
|
||||||
|
}
|
||||||
|
if err := op.scheduler.dqQueue.Enqueue(task); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := task.WaitToFinish(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
rowNum := len(result.Results.GetScores())
|
||||||
|
HighlightResults := []*commonpb.HighlightResult{}
|
||||||
|
if rowNum != 0 {
|
||||||
|
rowDatas := lo.Map(task.result.Results, func(result *querypb.HighlightResult, i int) *commonpb.HighlightData {
|
||||||
|
return buildStringFragments(op.tasks[i/rowNum], i%rowNum, result.GetFragments())
|
||||||
|
})
|
||||||
|
|
||||||
|
for i, task := range req.GetTasks() {
|
||||||
|
HighlightResults = append(HighlightResults, &commonpb.HighlightResult{
|
||||||
|
FieldName: task.GetFieldName(),
|
||||||
|
Datas: rowDatas[i*rowNum : (i+1)*rowNum],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.Results.HighlightResults = HighlightResults
|
||||||
|
return []any{result}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildStringFragments(task *highlightTask, idx int, frags []*querypb.HighlightFragment) *commonpb.HighlightData {
|
||||||
|
startOffset := int(task.GetSearchTextNum()) + len(task.Queries)
|
||||||
|
text := []rune(task.Texts[startOffset+idx])
|
||||||
|
preTagsNum := len(task.preTags)
|
||||||
|
postTagsNum := len(task.postTags)
|
||||||
|
result := &commonpb.HighlightData{Fragments: make([]string, 0)}
|
||||||
|
for _, frag := range frags {
|
||||||
|
var fragString strings.Builder
|
||||||
|
cursor := int(frag.GetStartOffset())
|
||||||
|
for i := 0; i < len(frag.GetOffsets())/2; i++ {
|
||||||
|
startOffset := int(frag.Offsets[i<<1])
|
||||||
|
endOffset := int(frag.Offsets[(i<<1)+1])
|
||||||
|
if cursor < startOffset {
|
||||||
|
fragString.WriteString(string(text[cursor:startOffset]))
|
||||||
|
}
|
||||||
|
fragString.WriteString(string(task.preTags[i%preTagsNum]))
|
||||||
|
fragString.WriteString(string(text[startOffset:endOffset]))
|
||||||
|
fragString.WriteString(string(task.postTags[i%postTagsNum]))
|
||||||
|
cursor = endOffset
|
||||||
|
}
|
||||||
|
if cursor < int(frag.GetEndOffset()) {
|
||||||
|
fragString.WriteString(string(text[cursor:frag.GetEndOffset()]))
|
||||||
|
}
|
||||||
|
result.Fragments = append(result.Fragments, fragString.String())
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
@ -112,6 +112,7 @@ const (
|
|||||||
organizeOp = "organize"
|
organizeOp = "organize"
|
||||||
endOp = "end"
|
endOp = "end"
|
||||||
lambdaOp = "lambda"
|
lambdaOp = "lambda"
|
||||||
|
highlightOp = "highlight"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -128,6 +129,7 @@ var opFactory = map[string]func(t *searchTask, params map[string]any) (operator,
|
|||||||
requeryOp: newRequeryOperator,
|
requeryOp: newRequeryOperator,
|
||||||
lambdaOp: newLambdaOperator,
|
lambdaOp: newLambdaOperator,
|
||||||
endOp: newEndOperator,
|
endOp: newEndOperator,
|
||||||
|
highlightOp: newHighlightOperator,
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewNode(info *nodeDef, t *searchTask) (*Node, error) {
|
func NewNode(info *nodeDef, t *searchTask) (*Node, error) {
|
||||||
@ -596,6 +598,10 @@ func (op *endOperator) run(ctx context.Context, span trace.Span, inputs ...any)
|
|||||||
return []any{result}, nil
|
return []any{result}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newHighlightOperator(t *searchTask, _ map[string]any) (operator, error) {
|
||||||
|
return t.highlighter.AsSearchPipelineOperator(t)
|
||||||
|
}
|
||||||
|
|
||||||
func mergeIDsFunc(ctx context.Context, span trace.Span, inputs ...any) ([]any, error) {
|
func mergeIDsFunc(ctx context.Context, span trace.Span, inputs ...any) ([]any, error) {
|
||||||
multipleMilvusResults := inputs[0].([]*milvuspb.SearchResults)
|
multipleMilvusResults := inputs[0].([]*milvuspb.SearchResults)
|
||||||
idInt64Type := false
|
idInt64Type := false
|
||||||
@ -652,6 +658,17 @@ func newPipeline(pipeDef *pipelineDef, t *searchTask) (*pipeline, error) {
|
|||||||
return &pipeline{name: pipeDef.name, nodes: nodes}, nil
|
return &pipeline{name: pipeDef.name, nodes: nodes}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *pipeline) AddNodes(t *searchTask, nodes ...*nodeDef) error {
|
||||||
|
for _, def := range nodes {
|
||||||
|
node, err := NewNode(def, t)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
p.nodes = append(p.nodes, node)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p *pipeline) Run(ctx context.Context, span trace.Span, toReduceResults []*internalpb.SearchResults, storageCost segcore.StorageCost) (*milvuspb.SearchResults, segcore.StorageCost, error) {
|
func (p *pipeline) Run(ctx context.Context, span trace.Span, toReduceResults []*internalpb.SearchResults, storageCost segcore.StorageCost) (*milvuspb.SearchResults, segcore.StorageCost, error) {
|
||||||
log.Ctx(ctx).Debug("SearchPipeline run", zap.String("pipeline", p.String()))
|
log.Ctx(ctx).Debug("SearchPipeline run", zap.String("pipeline", p.String()))
|
||||||
msg := opMsg{}
|
msg := opMsg{}
|
||||||
@ -682,6 +699,20 @@ type pipelineDef struct {
|
|||||||
nodes []*nodeDef
|
nodes []*nodeDef
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var filterFieldNode = &nodeDef{
|
||||||
|
name: "filter_field",
|
||||||
|
inputs: []string{"result", "reduced"},
|
||||||
|
outputs: []string{"output"},
|
||||||
|
opName: endOp,
|
||||||
|
}
|
||||||
|
|
||||||
|
var highlightNode = &nodeDef{
|
||||||
|
name: "highlight",
|
||||||
|
inputs: []string{"result"},
|
||||||
|
outputs: []string{"output"},
|
||||||
|
opName: highlightOp,
|
||||||
|
}
|
||||||
|
|
||||||
var searchPipe = &pipelineDef{
|
var searchPipe = &pipelineDef{
|
||||||
name: "search",
|
name: "search",
|
||||||
nodes: []*nodeDef{
|
nodes: []*nodeDef{
|
||||||
@ -703,12 +734,6 @@ var searchPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -767,12 +792,6 @@ var searchWithRequeryPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -825,12 +844,6 @@ var searchWithRerankPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -899,12 +912,6 @@ var searchWithRerankRequeryPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -923,12 +930,6 @@ var hybridSearchPipe = &pipelineDef{
|
|||||||
outputs: []string{"result"},
|
outputs: []string{"result"},
|
||||||
opName: rerankOp,
|
opName: rerankOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1030,12 +1031,6 @@ var hybridSearchWithRequeryAndRerankByFieldDataPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1093,12 +1088,6 @@ var hybridSearchWithRequeryPipe = &pipelineDef{
|
|||||||
},
|
},
|
||||||
opName: lambdaOp,
|
opName: lambdaOp,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "filter_field",
|
|
||||||
inputs: []string{"result", "reduced"},
|
|
||||||
outputs: []string{pipelineOutput},
|
|
||||||
opName: endOp,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1142,3 +1131,23 @@ func aggregatedAllSearchCount(searchResults []*milvuspb.SearchResults) int64 {
|
|||||||
}
|
}
|
||||||
return allSearchCount
|
return allSearchCount
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newSearchPipeline(t *searchTask) (*pipeline, error) {
|
||||||
|
p, err := newBuiltInPipeline(t)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if t.highlighter != nil {
|
||||||
|
err := p.AddNodes(t, highlightNode, filterFieldNode)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err := p.AddNodes(t, filterFieldNode)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|||||||
@ -24,6 +24,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/bytedance/mockey"
|
"github.com/bytedance/mockey"
|
||||||
|
"github.com/stretchr/testify/mock"
|
||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
"go.opentelemetry.io/otel"
|
"go.opentelemetry.io/otel"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
@ -31,10 +32,14 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/internal/mocks"
|
||||||
|
"github.com/milvus-io/milvus/internal/proxy/shardclient"
|
||||||
"github.com/milvus-io/milvus/internal/util/function/rerank"
|
"github.com/milvus-io/milvus/internal/util/function/rerank"
|
||||||
"github.com/milvus-io/milvus/internal/util/segcore"
|
"github.com/milvus-io/milvus/internal/util/segcore"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/testutils"
|
"github.com/milvus-io/milvus/pkg/v2/util/testutils"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/timerecord"
|
"github.com/milvus-io/milvus/pkg/v2/util/timerecord"
|
||||||
)
|
)
|
||||||
@ -266,6 +271,96 @@ func (s *SearchPipelineSuite) TestOrganizeOp() {
|
|||||||
fmt.Println(ret)
|
fmt.Println(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SearchPipelineSuite) TestHighlightOp() {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
proxy := &Proxy{}
|
||||||
|
proxy.tsoAllocator = ×tampAllocator{
|
||||||
|
tso: newMockTimestampAllocatorInterface(),
|
||||||
|
}
|
||||||
|
sched, err := newTaskScheduler(ctx, proxy.tsoAllocator)
|
||||||
|
s.Require().NoError(err)
|
||||||
|
|
||||||
|
err = sched.Start()
|
||||||
|
s.Require().NoError(err)
|
||||||
|
defer sched.Close()
|
||||||
|
proxy.sched = sched
|
||||||
|
|
||||||
|
collName := "test_coll_highlight"
|
||||||
|
fieldName2Types := map[string]schemapb.DataType{
|
||||||
|
testVarCharField: schemapb.DataType_VarChar,
|
||||||
|
}
|
||||||
|
schema := constructCollectionSchemaByDataType(collName, fieldName2Types, testVarCharField, false)
|
||||||
|
|
||||||
|
req := &milvuspb.SearchRequest{
|
||||||
|
CollectionName: collName,
|
||||||
|
DbName: "default",
|
||||||
|
}
|
||||||
|
|
||||||
|
highlightTasks := map[int64]*highlightTask{
|
||||||
|
100: {
|
||||||
|
HighlightTask: &querypb.HighlightTask{
|
||||||
|
Texts: []string{"target text"},
|
||||||
|
FieldName: testVarCharField,
|
||||||
|
FieldId: 100,
|
||||||
|
},
|
||||||
|
preTags: [][]byte{[]byte(DefaultPreTag)},
|
||||||
|
postTags: [][]byte{[]byte(DefaultPostTag)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
mockLb := shardclient.NewMockLBPolicy(s.T())
|
||||||
|
searchTask := &searchTask{
|
||||||
|
node: proxy,
|
||||||
|
highlighter: &LexicalHighlighter{
|
||||||
|
tasks: highlightTasks,
|
||||||
|
},
|
||||||
|
lb: mockLb,
|
||||||
|
schema: newSchemaInfo(schema),
|
||||||
|
request: req,
|
||||||
|
collectionName: collName,
|
||||||
|
SearchRequest: &internalpb.SearchRequest{
|
||||||
|
CollectionID: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
op, err := opFactory[highlightOp](searchTask, map[string]any{})
|
||||||
|
s.Require().NoError(err)
|
||||||
|
|
||||||
|
// mockery
|
||||||
|
mockLb.EXPECT().ExecuteOneChannel(mock.Anything, mock.Anything).Run(func(ctx context.Context, workload shardclient.CollectionWorkLoad) {
|
||||||
|
qn := mocks.NewMockQueryNodeClient(s.T())
|
||||||
|
qn.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(
|
||||||
|
&querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Success(),
|
||||||
|
Results: []*querypb.HighlightResult{},
|
||||||
|
}, nil)
|
||||||
|
workload.Exec(ctx, 0, qn, "test_chan")
|
||||||
|
}).Return(nil)
|
||||||
|
|
||||||
|
_, err = op.run(ctx, s.span, &milvuspb.SearchResults{
|
||||||
|
Results: &schemapb.SearchResultData{
|
||||||
|
TopK: 3,
|
||||||
|
Topks: []int64{1},
|
||||||
|
FieldsData: []*schemapb.FieldData{{
|
||||||
|
FieldName: testVarCharField,
|
||||||
|
FieldId: 100,
|
||||||
|
Field: &schemapb.FieldData_Scalars{
|
||||||
|
Scalars: &schemapb.ScalarField{
|
||||||
|
Data: &schemapb.ScalarField_StringData{
|
||||||
|
StringData: &schemapb.StringArray{
|
||||||
|
Data: []string{"match text"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
s.NoError(err)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *SearchPipelineSuite) TestSearchPipeline() {
|
func (s *SearchPipelineSuite) TestSearchPipeline() {
|
||||||
collectionName := "test"
|
collectionName := "test"
|
||||||
task := &searchTask{
|
task := &searchTask{
|
||||||
@ -295,8 +390,11 @@ func (s *SearchPipelineSuite) TestSearchPipeline() {
|
|||||||
queryInfos: []*planpb.QueryInfo{{}},
|
queryInfos: []*planpb.QueryInfo{{}},
|
||||||
translatedOutputFields: []string{"intField"},
|
translatedOutputFields: []string{"intField"},
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline, err := newPipeline(searchPipe, task)
|
pipeline, err := newPipeline(searchPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
sr := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
sr := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
||||||
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{sr}, segcore.StorageCost{ScannedRemoteBytes: 100, ScannedTotalBytes: 250})
|
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{sr}, segcore.StorageCost{ScannedRemoteBytes: 100, ScannedTotalBytes: 250})
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
@ -363,6 +461,8 @@ func (s *SearchPipelineSuite) TestSearchPipelineWithRequery() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(searchWithRequeryPipe, task)
|
pipeline, err := newPipeline(searchWithRequeryPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{
|
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{
|
||||||
genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false),
|
genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false),
|
||||||
}, segcore.StorageCost{ScannedRemoteBytes: 100, ScannedTotalBytes: 200})
|
}, segcore.StorageCost{ScannedRemoteBytes: 100, ScannedTotalBytes: 200})
|
||||||
@ -435,6 +535,7 @@ func (s *SearchPipelineSuite) TestSearchWithRerankPipe() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(searchWithRerankPipe, task)
|
pipeline, err := newPipeline(searchWithRerankPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
searchResults := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
searchResults := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
||||||
results, _, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{searchResults}, segcore.StorageCost{})
|
results, _, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{searchResults}, segcore.StorageCost{})
|
||||||
@ -518,6 +619,7 @@ func (s *SearchPipelineSuite) TestSearchWithRerankRequeryPipe() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(searchWithRerankRequeryPipe, task)
|
pipeline, err := newPipeline(searchWithRerankRequeryPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
searchResults := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
searchResults := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, false)
|
||||||
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{searchResults}, segcore.StorageCost{})
|
results, storageCost, err := pipeline.Run(context.Background(), s.span, []*internalpb.SearchResults{searchResults}, segcore.StorageCost{})
|
||||||
@ -552,6 +654,7 @@ func (s *SearchPipelineSuite) TestHybridSearchPipe() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(hybridSearchPipe, task)
|
pipeline, err := newPipeline(hybridSearchPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
f1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
f1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
f2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
f2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
@ -665,6 +768,7 @@ func (s *SearchPipelineSuite) TestHybridSearchWithRequeryAndRerankByDataPipe() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(hybridSearchWithRequeryAndRerankByFieldDataPipe, task)
|
pipeline, err := newPipeline(hybridSearchWithRequeryAndRerankByFieldDataPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
d1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
d1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
d2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
d2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
@ -707,6 +811,7 @@ func (s *SearchPipelineSuite) TestHybridSearchWithRequeryPipe() {
|
|||||||
|
|
||||||
pipeline, err := newPipeline(hybridSearchWithRequeryPipe, task)
|
pipeline, err := newPipeline(hybridSearchWithRequeryPipe, task)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
pipeline.AddNodes(task, filterFieldNode)
|
||||||
|
|
||||||
d1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
d1 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
d2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
d2 := genTestSearchResultData(2, 10, schemapb.DataType_Int64, "intField", 101, true)
|
||||||
|
|||||||
@ -65,6 +65,7 @@ const (
|
|||||||
StrictCastKey = "strict_cast"
|
StrictCastKey = "strict_cast"
|
||||||
RankGroupScorer = "rank_group_scorer"
|
RankGroupScorer = "rank_group_scorer"
|
||||||
AnnsFieldKey = "anns_field"
|
AnnsFieldKey = "anns_field"
|
||||||
|
AnalyzerKey = "analyzer_name"
|
||||||
TopKKey = "topk"
|
TopKKey = "topk"
|
||||||
NQKey = "nq"
|
NQKey = "nq"
|
||||||
MetricTypeKey = common.MetricTypeKey
|
MetricTypeKey = common.MetricTypeKey
|
||||||
@ -114,6 +115,7 @@ const (
|
|||||||
ListResourceGroupsTaskName = "ListResourceGroupsTask"
|
ListResourceGroupsTaskName = "ListResourceGroupsTask"
|
||||||
DescribeResourceGroupTaskName = "DescribeResourceGroupTask"
|
DescribeResourceGroupTaskName = "DescribeResourceGroupTask"
|
||||||
RunAnalyzerTaskName = "RunAnalyzer"
|
RunAnalyzerTaskName = "RunAnalyzer"
|
||||||
|
HighlightTaskName = "Highlight"
|
||||||
|
|
||||||
CreateDatabaseTaskName = "CreateCollectionTask"
|
CreateDatabaseTaskName = "CreateCollectionTask"
|
||||||
DropDatabaseTaskName = "DropDatabaseTaskName"
|
DropDatabaseTaskName = "DropDatabaseTaskName"
|
||||||
@ -3144,6 +3146,95 @@ func (t *RunAnalyzerTask) PostExecute(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// git highlight after search
|
||||||
|
type HighlightTask struct {
|
||||||
|
baseTask
|
||||||
|
Condition
|
||||||
|
*querypb.GetHighlightRequest
|
||||||
|
ctx context.Context
|
||||||
|
collectionName string
|
||||||
|
collectionID typeutil.UniqueID
|
||||||
|
dbName string
|
||||||
|
lb shardclient.LBPolicy
|
||||||
|
|
||||||
|
result *querypb.GetHighlightResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) TraceCtx() context.Context {
|
||||||
|
return t.ctx
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) ID() UniqueID {
|
||||||
|
return t.Base.MsgID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) SetID(uid UniqueID) {
|
||||||
|
t.Base.MsgID = uid
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) Name() string {
|
||||||
|
return HighlightTaskName
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) Type() commonpb.MsgType {
|
||||||
|
return t.Base.MsgType
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) BeginTs() Timestamp {
|
||||||
|
return t.Base.Timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) EndTs() Timestamp {
|
||||||
|
return t.Base.Timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) SetTs(ts Timestamp) {
|
||||||
|
t.Base.Timestamp = ts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) OnEnqueue() error {
|
||||||
|
if t.Base == nil {
|
||||||
|
t.Base = commonpbutil.NewMsgBase()
|
||||||
|
}
|
||||||
|
t.Base.MsgType = commonpb.MsgType_Undefined
|
||||||
|
t.Base.SourceID = paramtable.GetNodeID()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) PreExecute(ctx context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) getHighlightOnShardleader(ctx context.Context, nodeID int64, qn types.QueryNodeClient, channel string) error {
|
||||||
|
t.GetHighlightRequest.Channel = channel
|
||||||
|
resp, err := qn.GetHighlight(ctx, t.GetHighlightRequest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := merr.Error(resp.GetStatus()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
t.result = resp
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) Execute(ctx context.Context) error {
|
||||||
|
err := t.lb.ExecuteOneChannel(ctx, shardclient.CollectionWorkLoad{
|
||||||
|
Db: t.dbName,
|
||||||
|
CollectionName: t.collectionName,
|
||||||
|
CollectionID: t.collectionID,
|
||||||
|
Nq: int64(len(t.GetTopks()) * len(t.GetTasks())),
|
||||||
|
Exec: t.getHighlightOnShardleader,
|
||||||
|
})
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *HighlightTask) PostExecute(ctx context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// isIgnoreGrowing is used to check if the request should ignore growing
|
// isIgnoreGrowing is used to check if the request should ignore growing
|
||||||
func isIgnoreGrowing(params []*commonpb.KeyValuePair) (bool, error) {
|
func isIgnoreGrowing(params []*commonpb.KeyValuePair) (bool, error) {
|
||||||
for _, kv := range params {
|
for _, kv := range params {
|
||||||
|
|||||||
@ -80,7 +80,7 @@ type searchTask struct {
|
|||||||
translatedOutputFields []string
|
translatedOutputFields []string
|
||||||
userOutputFields []string
|
userOutputFields []string
|
||||||
userDynamicFields []string
|
userDynamicFields []string
|
||||||
|
highlighter Highlighter
|
||||||
resultBuf *typeutil.ConcurrentSet[*internalpb.SearchResults]
|
resultBuf *typeutil.ConcurrentSet[*internalpb.SearchResults]
|
||||||
|
|
||||||
partitionIDsSet *typeutil.ConcurrentSet[UniqueID]
|
partitionIDsSet *typeutil.ConcurrentSet[UniqueID]
|
||||||
@ -466,12 +466,13 @@ func (t *searchTask) initAdvancedSearchRequest(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// set analyzer name for sub search
|
// set analyzer name for sub search
|
||||||
analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV("analyzer_name", subReq.GetSearchParams())
|
analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV(AnalyzerKey, subReq.GetSearchParams())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
internalSubReq.AnalyzerName = analyzer
|
internalSubReq.AnalyzerName = analyzer
|
||||||
}
|
}
|
||||||
|
|
||||||
internalSubReq.FieldId = queryInfo.GetQueryFieldId()
|
internalSubReq.FieldId = queryInfo.GetQueryFieldId()
|
||||||
|
|
||||||
queryFieldIDs = append(queryFieldIDs, internalSubReq.FieldId)
|
queryFieldIDs = append(queryFieldIDs, internalSubReq.FieldId)
|
||||||
// set PartitionIDs for sub search
|
// set PartitionIDs for sub search
|
||||||
if t.partitionKeyMode {
|
if t.partitionKeyMode {
|
||||||
@ -557,6 +558,67 @@ func (t *searchTask) fillResult() {
|
|||||||
t.result.CollectionName = t.collectionName
|
t.result.CollectionName = t.collectionName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *searchTask) getBM25SearchTexts(placeholder []byte) ([]string, error) {
|
||||||
|
pb := &commonpb.PlaceholderGroup{}
|
||||||
|
proto.Unmarshal(placeholder, pb)
|
||||||
|
|
||||||
|
if len(pb.Placeholders) != 1 || len(pb.Placeholders[0].Values) == 0 {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("please provide varchar/text for BM25 Function based search")
|
||||||
|
}
|
||||||
|
|
||||||
|
holder := pb.Placeholders[0]
|
||||||
|
if holder.Type != commonpb.PlaceholderType_VarChar {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("please provide varchar/text for BM25 Function based search, got %s", holder.Type.String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
texts := funcutil.GetVarCharFromPlaceholder(holder)
|
||||||
|
return texts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *searchTask) createLexicalHighlighter(highlighter *commonpb.Highlighter, metricType string, annsField int64, placeholder []byte, analyzerName string) error {
|
||||||
|
h, err := NewLexicalHighlighter(highlighter)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
t.highlighter = h
|
||||||
|
if h.highlightSearch {
|
||||||
|
if metricType != metric.BM25 {
|
||||||
|
return merr.WrapErrParameterInvalidMsg(`Search highlight only support with metric type "BM25" but was: %s`, t.SearchRequest.GetMetricType())
|
||||||
|
}
|
||||||
|
function, ok := getBM25FunctionOfAnnsField(annsField, t.schema.GetFunctions())
|
||||||
|
if !ok {
|
||||||
|
return merr.WrapErrServiceInternal(`Search with highlight failed, input field of BM25 annsField not found`)
|
||||||
|
}
|
||||||
|
fieldId := function.InputFieldIds[0]
|
||||||
|
fieldName := function.InputFieldNames[0]
|
||||||
|
// set bm25 search text as highlight search texts
|
||||||
|
texts, err := t.getBM25SearchTexts(placeholder)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = h.addTaskWithSearchText(fieldId, fieldName, analyzerName, texts)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return h.initHighlightQueries(t)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *searchTask) addHighlightTask(highlighter *commonpb.Highlighter, metricType string, annsField int64, placeholder []byte, analyzerName string) error {
|
||||||
|
if highlighter == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch highlighter.GetType() {
|
||||||
|
case commonpb.HighlightType_Lexical:
|
||||||
|
return t.createLexicalHighlighter(highlighter, metricType, annsField, placeholder, analyzerName)
|
||||||
|
default:
|
||||||
|
return merr.WrapErrParameterInvalidMsg("unsupported highlight type: %v", highlighter.GetType())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (t *searchTask) initSearchRequest(ctx context.Context) error {
|
func (t *searchTask) initSearchRequest(ctx context.Context) error {
|
||||||
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "init search request")
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "init search request")
|
||||||
defer sp.End()
|
defer sp.End()
|
||||||
@ -579,10 +641,24 @@ func (t *searchTask) initSearchRequest(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV(AnalyzerKey, t.request.GetSearchParams())
|
||||||
|
if err == nil {
|
||||||
|
t.SearchRequest.AnalyzerName = analyzer
|
||||||
|
}
|
||||||
|
|
||||||
t.isIterator = isIterator
|
t.isIterator = isIterator
|
||||||
t.SearchRequest.Offset = offset
|
t.SearchRequest.Offset = offset
|
||||||
t.SearchRequest.FieldId = queryInfo.GetQueryFieldId()
|
t.SearchRequest.FieldId = queryInfo.GetQueryFieldId()
|
||||||
|
|
||||||
|
if err := t.addHighlightTask(t.request.GetHighlighter(), queryInfo.GetMetricType(), queryInfo.GetQueryFieldId(), t.request.GetPlaceholderGroup(), t.SearchRequest.GetAnalyzerName()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// add highlight field ids to output fields id
|
||||||
|
if t.highlighter != nil {
|
||||||
|
t.SearchRequest.OutputFieldsId = append(t.SearchRequest.OutputFieldsId, t.highlighter.FieldIDs()...)
|
||||||
|
}
|
||||||
|
|
||||||
if t.partitionKeyMode {
|
if t.partitionKeyMode {
|
||||||
// isolation has tighter constraint, check first
|
// isolation has tighter constraint, check first
|
||||||
mvErr := setQueryInfoIfMvEnable(queryInfo, t, plan)
|
mvErr := setQueryInfoIfMvEnable(queryInfo, t, plan)
|
||||||
@ -632,13 +708,6 @@ func (t *searchTask) initSearchRequest(ctx context.Context) error {
|
|||||||
t.SearchRequest.GroupByFieldId = queryInfo.GroupByFieldId
|
t.SearchRequest.GroupByFieldId = queryInfo.GroupByFieldId
|
||||||
t.SearchRequest.GroupSize = queryInfo.GroupSize
|
t.SearchRequest.GroupSize = queryInfo.GroupSize
|
||||||
|
|
||||||
if t.SearchRequest.MetricType == metric.BM25 {
|
|
||||||
analyzer, err := funcutil.GetAttrByKeyFromRepeatedKV("analyzer_name", t.request.GetSearchParams())
|
|
||||||
if err == nil {
|
|
||||||
t.SearchRequest.AnalyzerName = analyzer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if embedding.HasNonBM25Functions(t.schema.CollectionSchema.Functions, []int64{queryInfo.GetQueryFieldId()}) {
|
if embedding.HasNonBM25Functions(t.schema.CollectionSchema.Functions, []int64{queryInfo.GetQueryFieldId()}) {
|
||||||
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-call-function-udf")
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-call-function-udf")
|
||||||
defer sp.End()
|
defer sp.End()
|
||||||
@ -813,7 +882,7 @@ func (t *searchTask) PostExecute(ctx context.Context) error {
|
|||||||
t.isRecallEvaluation = isRecallEvaluation
|
t.isRecallEvaluation = isRecallEvaluation
|
||||||
|
|
||||||
// call pipeline
|
// call pipeline
|
||||||
pipeline, err := newBuiltInPipeline(t)
|
pipeline, err := newSearchPipeline(t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Faild to create post process pipeline")
|
log.Warn("Faild to create post process pipeline")
|
||||||
return err
|
return err
|
||||||
|
|||||||
@ -4885,3 +4885,198 @@ func TestSearchTask_InitSearchRequestWithStructArrayFields(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSearchTask_AddHighlightTask(t *testing.T) {
|
||||||
|
paramtable.Init()
|
||||||
|
|
||||||
|
// Create a schema with BM25 function
|
||||||
|
schema := &schemapb.CollectionSchema{
|
||||||
|
Name: "test_highlight_collection",
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "text_field",
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "sparse_field",
|
||||||
|
DataType: schemapb.DataType_SparseFloatVector,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []*schemapb.FunctionSchema{
|
||||||
|
{
|
||||||
|
Name: "bm25_func",
|
||||||
|
Type: schemapb.FunctionType_BM25,
|
||||||
|
InputFieldNames: []string{"text_field"},
|
||||||
|
InputFieldIds: []int64{100},
|
||||||
|
OutputFieldNames: []string{"sparse_field"},
|
||||||
|
OutputFieldIds: []int64{101},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
placeholder := &commonpb.PlaceholderGroup{
|
||||||
|
Placeholders: []*commonpb.PlaceholderValue{{
|
||||||
|
Type: commonpb.PlaceholderType_VarChar,
|
||||||
|
Values: [][]byte{[]byte("test_str")},
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
|
||||||
|
placeholderBytes, err := proto.Marshal(placeholder)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
t.Run("lexical highlight success", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
h, ok := task.highlighter.(*LexicalHighlighter)
|
||||||
|
require.True(t, ok)
|
||||||
|
require.Equal(t, 1, len(h.tasks))
|
||||||
|
assert.Equal(t, int64(100), h.tasks[100].FieldId)
|
||||||
|
assert.Equal(t, "text_field", h.tasks[100].FieldName)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Lexical highlight with custom tags", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}, {Key: "pre_tags", Value: `["<b>"]`}, {Key: "post_tags", Value: `["</b>"]`}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
h, ok := task.highlighter.(*LexicalHighlighter)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, 1, len(h.preTags))
|
||||||
|
assert.Equal(t, []byte("<b>"), h.preTags[0])
|
||||||
|
assert.Equal(t, 1, len(h.postTags))
|
||||||
|
assert.Equal(t, []byte("</b>"), h.postTags[0])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lexical highlight with wrong metric type", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
SearchRequest: &internalpb.SearchRequest{},
|
||||||
|
request: &milvuspb.SearchRequest{},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.L2, 101, placeholderBytes, "")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lexical highlight with invalid pre_tags type", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}, {Key: "pre_tags", Value: "not_a_list"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("default lexical highlight but not BM25 field", func(t *testing.T) {
|
||||||
|
schemaWithoutBM25 := &schemapb.CollectionSchema{
|
||||||
|
Name: "test_collection",
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "vector_field",
|
||||||
|
DataType: schemapb.DataType_FloatVector,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schemaWithoutBM25,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 100, placeholderBytes, "")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("highlight without highlight search text", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "false"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("highlight with invalid highlight search key", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: commonpb.HighlightType_Lexical,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "invalid"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("highlight with unknown type", func(t *testing.T) {
|
||||||
|
task := &searchTask{
|
||||||
|
schema: &schemaInfo{
|
||||||
|
CollectionSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
highlighter := &commonpb.Highlighter{
|
||||||
|
Type: 4,
|
||||||
|
Params: []*commonpb.KeyValuePair{{Key: HighlightSearchTextKey, Value: "true"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.addHighlightTask(highlighter, metric.BM25, 101, placeholderBytes, "")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -39,6 +39,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/allocator"
|
"github.com/milvus-io/milvus/internal/allocator"
|
||||||
"github.com/milvus-io/milvus/internal/json"
|
"github.com/milvus-io/milvus/internal/json"
|
||||||
"github.com/milvus-io/milvus/internal/mocks"
|
"github.com/milvus-io/milvus/internal/mocks"
|
||||||
|
"github.com/milvus-io/milvus/internal/proxy/shardclient"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||||
"github.com/milvus-io/milvus/internal/util/function/embedding"
|
"github.com/milvus-io/milvus/internal/util/function/embedding"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/common"
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
||||||
@ -5497,3 +5498,237 @@ func TestAlterCollection_AllowInsertAutoID_AutoIDFalse(t *testing.T) {
|
|||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, merr.Code(merr.ErrParameterInvalid), merr.Code(err))
|
assert.Equal(t, merr.Code(merr.ErrParameterInvalid), merr.Code(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHighlightTask(t *testing.T) {
|
||||||
|
paramtable.Init()
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
t.Run("basic methods", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("traceCtx", func(t *testing.T) {
|
||||||
|
traceCtx := task.TraceCtx()
|
||||||
|
assert.NotNil(t, traceCtx)
|
||||||
|
assert.Equal(t, ctx, traceCtx)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("id", func(t *testing.T) {
|
||||||
|
id := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||||
|
task.SetID(id)
|
||||||
|
assert.Equal(t, id, task.ID())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("name", func(t *testing.T) {
|
||||||
|
assert.Equal(t, HighlightTaskName, task.Name())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("type", func(t *testing.T) {
|
||||||
|
assert.Equal(t, commonpb.MsgType_Undefined, task.Type())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("ts", func(t *testing.T) {
|
||||||
|
ts := Timestamp(time.Now().UnixNano())
|
||||||
|
task.SetTs(ts)
|
||||||
|
assert.Equal(t, ts, task.BeginTs())
|
||||||
|
assert.Equal(t, ts, task.EndTs())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("OnEnqueue", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.OnEnqueue()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, task.Base)
|
||||||
|
assert.Equal(t, commonpb.MsgType_Undefined, task.Base.MsgType)
|
||||||
|
assert.Equal(t, paramtable.GetNodeID(), task.Base.SourceID)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("PreExecute", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.PreExecute(ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("getHighlightOnShardleader success", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
Topks: []int64{10},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN := mocks.NewMockQueryNodeClient(t)
|
||||||
|
expectedResp := &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Success(),
|
||||||
|
Results: []*querypb.HighlightResult{
|
||||||
|
{
|
||||||
|
Fragments: []*querypb.HighlightFragment{
|
||||||
|
{
|
||||||
|
StartOffset: 0,
|
||||||
|
EndOffset: 10,
|
||||||
|
Offsets: []int64{0, 5, 5, 10},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(expectedResp, nil)
|
||||||
|
|
||||||
|
err := task.getHighlightOnShardleader(ctx, 1, mockQN, "test_channel")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, task.result)
|
||||||
|
assert.Equal(t, expectedResp, task.result)
|
||||||
|
assert.Equal(t, "test_channel", task.GetHighlightRequest.Channel)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("getHighlightOnShardleader rpc error", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN := mocks.NewMockQueryNodeClient(t)
|
||||||
|
mockQN.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(nil, errors.New("rpc error"))
|
||||||
|
|
||||||
|
err := task.getHighlightOnShardleader(ctx, 1, mockQN, "test_channel")
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "rpc error")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("getHighlightOnShardleader status error", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN := mocks.NewMockQueryNodeClient(t)
|
||||||
|
expectedResp := &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Status(errors.New("status error")),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(expectedResp, nil)
|
||||||
|
|
||||||
|
err := task.getHighlightOnShardleader(ctx, 1, mockQN, "test_channel")
|
||||||
|
assert.Error(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Execute success", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
collectionName: "test_collection",
|
||||||
|
collectionID: 100,
|
||||||
|
dbName: "default",
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
Topks: []int64{10, 20},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockLB := shardclient.NewMockLBPolicy(t)
|
||||||
|
mockQN := mocks.NewMockQueryNodeClient(t)
|
||||||
|
|
||||||
|
expectedResp := &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Success(),
|
||||||
|
Results: []*querypb.HighlightResult{
|
||||||
|
{
|
||||||
|
Fragments: []*querypb.HighlightFragment{
|
||||||
|
{
|
||||||
|
StartOffset: 0,
|
||||||
|
EndOffset: 10,
|
||||||
|
Offsets: []int64{0, 5, 5, 10},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
mockQN.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(expectedResp, nil)
|
||||||
|
|
||||||
|
mockLB.EXPECT().ExecuteOneChannel(mock.Anything, mock.Anything).Run(
|
||||||
|
func(ctx context.Context, workload shardclient.CollectionWorkLoad) {
|
||||||
|
assert.Equal(t, "default", workload.Db)
|
||||||
|
assert.Equal(t, "test_collection", workload.CollectionName)
|
||||||
|
assert.Equal(t, int64(100), workload.CollectionID)
|
||||||
|
assert.Equal(t, int64(4), workload.Nq) // len(topks) * len(tasks) = 2 * 2 = 4
|
||||||
|
err := workload.Exec(ctx, 1, mockQN, "test_channel")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
},
|
||||||
|
).Return(nil)
|
||||||
|
|
||||||
|
task.lb = mockLB
|
||||||
|
|
||||||
|
err := task.Execute(ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, task.result)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Execute lb error", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
collectionName: "test_collection",
|
||||||
|
collectionID: 100,
|
||||||
|
dbName: "default",
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
mockLB := shardclient.NewMockLBPolicy(t)
|
||||||
|
mockLB.EXPECT().ExecuteOneChannel(mock.Anything, mock.Anything).Return(errors.New("lb error"))
|
||||||
|
|
||||||
|
task.lb = mockLB
|
||||||
|
|
||||||
|
err := task.Execute(ctx)
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "lb error")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("PostExecute", func(t *testing.T) {
|
||||||
|
task := &HighlightTask{
|
||||||
|
ctx: ctx,
|
||||||
|
GetHighlightRequest: &querypb.GetHighlightRequest{
|
||||||
|
Base: commonpbutil.NewMsgBase(),
|
||||||
|
},
|
||||||
|
Condition: NewTaskCondition(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
err := task.PostExecute(ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -3040,3 +3040,9 @@ func genFunctionFields(ctx context.Context, insertMsg *msgstream.InsertMsg, sche
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getBM25FunctionOfAnnsField(fieldID int64, functions []*schemapb.FunctionSchema) (*schemapb.FunctionSchema, bool) {
|
||||||
|
return lo.Find(functions, func(function *schemapb.FunctionSchema) bool {
|
||||||
|
return function.GetType() == schemapb.FunctionType_BM25 && function.OutputFieldIds[0] == fieldID
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -324,6 +324,65 @@ func (_c *MockQueryNodeServer_GetDataDistribution_Call) RunAndReturn(run func(co
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetHighlight provides a mock function with given fields: _a0, _a1
|
||||||
|
func (_m *MockQueryNodeServer) GetHighlight(_a0 context.Context, _a1 *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error) {
|
||||||
|
ret := _m.Called(_a0, _a1)
|
||||||
|
|
||||||
|
if len(ret) == 0 {
|
||||||
|
panic("no return value specified for GetHighlight")
|
||||||
|
}
|
||||||
|
|
||||||
|
var r0 *querypb.GetHighlightResponse
|
||||||
|
var r1 error
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error)); ok {
|
||||||
|
return rf(_a0, _a1)
|
||||||
|
}
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) *querypb.GetHighlightResponse); ok {
|
||||||
|
r0 = rf(_a0, _a1)
|
||||||
|
} else {
|
||||||
|
if ret.Get(0) != nil {
|
||||||
|
r0 = ret.Get(0).(*querypb.GetHighlightResponse)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if rf, ok := ret.Get(1).(func(context.Context, *querypb.GetHighlightRequest) error); ok {
|
||||||
|
r1 = rf(_a0, _a1)
|
||||||
|
} else {
|
||||||
|
r1 = ret.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r0, r1
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockQueryNodeServer_GetHighlight_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHighlight'
|
||||||
|
type MockQueryNodeServer_GetHighlight_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHighlight is a helper method to define mock.On call
|
||||||
|
// - _a0 context.Context
|
||||||
|
// - _a1 *querypb.GetHighlightRequest
|
||||||
|
func (_e *MockQueryNodeServer_Expecter) GetHighlight(_a0 interface{}, _a1 interface{}) *MockQueryNodeServer_GetHighlight_Call {
|
||||||
|
return &MockQueryNodeServer_GetHighlight_Call{Call: _e.mock.On("GetHighlight", _a0, _a1)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeServer_GetHighlight_Call) Run(run func(_a0 context.Context, _a1 *querypb.GetHighlightRequest)) *MockQueryNodeServer_GetHighlight_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
run(args[0].(context.Context), args[1].(*querypb.GetHighlightRequest))
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeServer_GetHighlight_Call) Return(_a0 *querypb.GetHighlightResponse, _a1 error) *MockQueryNodeServer_GetHighlight_Call {
|
||||||
|
_c.Call.Return(_a0, _a1)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockQueryNodeServer_GetHighlight_Call) RunAndReturn(run func(context.Context, *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error)) *MockQueryNodeServer_GetHighlight_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
// GetMetrics provides a mock function with given fields: _a0, _a1
|
// GetMetrics provides a mock function with given fields: _a0, _a1
|
||||||
func (_m *MockQueryNodeServer) GetMetrics(_a0 context.Context, _a1 *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
func (_m *MockQueryNodeServer) GetMetrics(_a0 context.Context, _a1 *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
||||||
ret := _m.Called(_a0, _a1)
|
ret := _m.Called(_a0, _a1)
|
||||||
|
|||||||
@ -104,6 +104,7 @@ type ShardDelegator interface {
|
|||||||
|
|
||||||
// analyzer
|
// analyzer
|
||||||
RunAnalyzer(ctx context.Context, req *querypb.RunAnalyzerRequest) ([]*milvuspb.AnalyzerResult, error)
|
RunAnalyzer(ctx context.Context, req *querypb.RunAnalyzerRequest) ([]*milvuspb.AnalyzerResult, error)
|
||||||
|
GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest) ([]*querypb.HighlightResult, error)
|
||||||
|
|
||||||
// control
|
// control
|
||||||
Serviceable() bool
|
Serviceable() bool
|
||||||
|
|||||||
@ -29,6 +29,7 @@ import (
|
|||||||
"google.golang.org/protobuf/proto"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/querynodev2/cluster"
|
"github.com/milvus-io/milvus/internal/querynodev2/cluster"
|
||||||
@ -1001,3 +1002,87 @@ func (sd *shardDelegator) DropIndex(ctx context.Context, req *querypb.DropIndexR
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sd *shardDelegator) GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest) ([]*querypb.HighlightResult, error) {
|
||||||
|
result := []*querypb.HighlightResult{}
|
||||||
|
for _, task := range req.GetTasks() {
|
||||||
|
if len(task.GetTexts()) != int(task.GetSearchTextNum()+task.GetCorpusTextNum())+len(task.GetQueries()) {
|
||||||
|
return nil, errors.Errorf("package highlight texts error, num of texts not equal the expected num %d:%d", len(task.GetTexts()), int(task.GetSearchTextNum()+task.GetCorpusTextNum())+len(task.GetQueries()))
|
||||||
|
}
|
||||||
|
analyzer, ok := sd.analyzerRunners[task.GetFieldId()]
|
||||||
|
if !ok {
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg("get highlight failed, the highlight field not found, %s:%d", task.GetFieldName(), task.GetFieldId())
|
||||||
|
}
|
||||||
|
topks := req.GetTopks()
|
||||||
|
var results [][]*milvuspb.AnalyzerToken
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if len(analyzer.GetInputFields()) == 1 {
|
||||||
|
results, err = analyzer.BatchAnalyze(true, false, task.GetTexts())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else if len(analyzer.GetInputFields()) == 2 {
|
||||||
|
// use analyzer names if analyzer need two input field
|
||||||
|
results, err = analyzer.BatchAnalyze(true, false, task.GetTexts(), task.GetAnalyzerNames())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// analyze result of search text
|
||||||
|
searchResults := results[0:task.SearchTextNum]
|
||||||
|
// analyze result of query text
|
||||||
|
queryResults := results[task.SearchTextNum : task.SearchTextNum+int64(len(task.Queries))]
|
||||||
|
// analyze result of corpus text
|
||||||
|
corpusStartOffset := int(task.SearchTextNum) + len(task.Queries)
|
||||||
|
corpusResults := results[corpusStartOffset:]
|
||||||
|
|
||||||
|
// query for all corpus texts
|
||||||
|
// only support text match now
|
||||||
|
// build match set for all analyze result of query text
|
||||||
|
// TODO: support more query types
|
||||||
|
queryTokenSet := typeutil.NewSet[string]()
|
||||||
|
for _, tokens := range queryResults {
|
||||||
|
for _, token := range tokens {
|
||||||
|
queryTokenSet.Insert(token.GetToken())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
corpusIdx := 0
|
||||||
|
for i := range len(topks) {
|
||||||
|
tokenSet := typeutil.NewSet[string]()
|
||||||
|
if len(searchResults) > i {
|
||||||
|
for _, token := range searchResults[i] {
|
||||||
|
tokenSet.Insert(token.GetToken())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for j := 0; j < int(topks[i]); j++ {
|
||||||
|
spans := SpanList{}
|
||||||
|
for _, token := range corpusResults[corpusIdx] {
|
||||||
|
if tokenSet.Contain(token.GetToken()) || queryTokenSet.Contain(token.GetToken()) {
|
||||||
|
spans = append(spans, Span{token.GetStartOffset(), token.GetEndOffset()})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spans = mergeOffsets(spans)
|
||||||
|
|
||||||
|
// Convert byte offsets from analyzer to rune (character) offsets
|
||||||
|
corpusText := task.Texts[corpusStartOffset+corpusIdx]
|
||||||
|
err := bytesOffsetToRuneOffset(corpusText, spans)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
frags := fetchFragmentsFromOffsets(corpusText, spans,
|
||||||
|
task.GetOptions().GetFragmentOffset(),
|
||||||
|
task.GetOptions().GetFragmentSize(),
|
||||||
|
task.GetOptions().GetNumOfFragments())
|
||||||
|
result = append(result, &querypb.HighlightResult{Fragments: frags})
|
||||||
|
corpusIdx++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|||||||
@ -1596,6 +1596,159 @@ func (s *DelegatorSuite) TestRunAnalyzer() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *DelegatorSuite) TestGetHighlight() {
|
||||||
|
ctx := context.Background()
|
||||||
|
s.TestCreateDelegatorWithFunction()
|
||||||
|
|
||||||
|
s.Run("field analyzer not exist", func() {
|
||||||
|
_, err := s.delegator.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Topks: []int64{1},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{
|
||||||
|
FieldId: 999, // non-existent field
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
s.Require().Error(err)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("normal highlight with single analyzer", func() {
|
||||||
|
s.manager.Collection.PutOrRef(s.collectionID, &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "text",
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{{Key: "analyzer_params", Value: "{}"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "sparse",
|
||||||
|
DataType: schemapb.DataType_SparseFloatVector,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []*schemapb.FunctionSchema{{
|
||||||
|
Type: schemapb.FunctionType_BM25,
|
||||||
|
InputFieldNames: []string{"text"},
|
||||||
|
InputFieldIds: []int64{100},
|
||||||
|
OutputFieldNames: []string{"sparse"},
|
||||||
|
OutputFieldIds: []int64{101},
|
||||||
|
}},
|
||||||
|
}, nil, &querypb.LoadMetaInfo{SchemaVersion: tsoutil.ComposeTSByTime(time.Now(), 0)})
|
||||||
|
s.ResetDelegator()
|
||||||
|
|
||||||
|
result, err := s.delegator.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Topks: []int64{2},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{
|
||||||
|
FieldId: 100,
|
||||||
|
Texts: []string{"test", "this is a test document", "another test case"},
|
||||||
|
SearchTextNum: 1,
|
||||||
|
CorpusTextNum: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
s.Require().NoError(err)
|
||||||
|
s.Require().Equal(2, len(result))
|
||||||
|
// Check that we got highlight results
|
||||||
|
s.Require().NotNil(result[0].Fragments)
|
||||||
|
s.Require().NotNil(result[1].Fragments)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("highlight with multi analyzer", func() {
|
||||||
|
s.manager.Collection.PutOrRef(s.collectionID, &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "text",
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{{Key: "multi_analyzer_params", Value: `{
|
||||||
|
"by_field": "analyzer",
|
||||||
|
"analyzers": {
|
||||||
|
"standard": {},
|
||||||
|
"default": {}
|
||||||
|
}
|
||||||
|
}`}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "sparse",
|
||||||
|
DataType: schemapb.DataType_SparseFloatVector,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 102,
|
||||||
|
Name: "analyzer",
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []*schemapb.FunctionSchema{{
|
||||||
|
Type: schemapb.FunctionType_BM25,
|
||||||
|
InputFieldNames: []string{"text"},
|
||||||
|
InputFieldIds: []int64{100},
|
||||||
|
OutputFieldNames: []string{"sparse"},
|
||||||
|
OutputFieldIds: []int64{101},
|
||||||
|
}},
|
||||||
|
}, nil, &querypb.LoadMetaInfo{SchemaVersion: tsoutil.ComposeTSByTime(time.Now(), 0)})
|
||||||
|
s.ResetDelegator()
|
||||||
|
|
||||||
|
// two target with two analyzer
|
||||||
|
result, err := s.delegator.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Topks: []int64{1, 1},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{
|
||||||
|
FieldId: 100,
|
||||||
|
Texts: []string{"test1", "test2", "this is a test1 document", "another test2 case"},
|
||||||
|
AnalyzerNames: []string{"default", "standard", "default", "default"},
|
||||||
|
SearchTextNum: 2,
|
||||||
|
CorpusTextNum: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
s.Require().NoError(err)
|
||||||
|
s.Require().Equal(2, len(result))
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("empty target texts", func() {
|
||||||
|
s.manager.Collection.PutOrRef(s.collectionID, &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "text",
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{{Key: "analyzer_params", Value: "{}"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "sparse",
|
||||||
|
DataType: schemapb.DataType_SparseFloatVector,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []*schemapb.FunctionSchema{{
|
||||||
|
Type: schemapb.FunctionType_BM25,
|
||||||
|
InputFieldNames: []string{"text"},
|
||||||
|
InputFieldIds: []int64{100},
|
||||||
|
OutputFieldNames: []string{"sparse"},
|
||||||
|
OutputFieldIds: []int64{101},
|
||||||
|
}},
|
||||||
|
}, nil, &querypb.LoadMetaInfo{SchemaVersion: tsoutil.ComposeTSByTime(time.Now(), 0)})
|
||||||
|
s.ResetDelegator()
|
||||||
|
|
||||||
|
result, err := s.delegator.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Topks: []int64{1},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{
|
||||||
|
FieldId: 100,
|
||||||
|
Texts: []string{"test document"},
|
||||||
|
SearchTextNum: 0,
|
||||||
|
CorpusTextNum: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
s.Require().NoError(err)
|
||||||
|
s.Require().NotNil(result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// TestDelegatorLifetimeIntegration tests the integration of lifetime state checks with main delegator methods
|
// TestDelegatorLifetimeIntegration tests the integration of lifetime state checks with main delegator methods
|
||||||
func (s *DelegatorSuite) TestDelegatorLifetimeIntegration() {
|
func (s *DelegatorSuite) TestDelegatorLifetimeIntegration() {
|
||||||
sd := s.delegator.(*shardDelegator)
|
sd := s.delegator.(*shardDelegator)
|
||||||
|
|||||||
@ -289,6 +289,65 @@ func (_c *MockShardDelegator_GetDeleteBufferSize_Call) RunAndReturn(run func() (
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetHighlight provides a mock function with given fields: ctx, req
|
||||||
|
func (_m *MockShardDelegator) GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest) ([]*querypb.HighlightResult, error) {
|
||||||
|
ret := _m.Called(ctx, req)
|
||||||
|
|
||||||
|
if len(ret) == 0 {
|
||||||
|
panic("no return value specified for GetHighlight")
|
||||||
|
}
|
||||||
|
|
||||||
|
var r0 []*querypb.HighlightResult
|
||||||
|
var r1 error
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) ([]*querypb.HighlightResult, error)); ok {
|
||||||
|
return rf(ctx, req)
|
||||||
|
}
|
||||||
|
if rf, ok := ret.Get(0).(func(context.Context, *querypb.GetHighlightRequest) []*querypb.HighlightResult); ok {
|
||||||
|
r0 = rf(ctx, req)
|
||||||
|
} else {
|
||||||
|
if ret.Get(0) != nil {
|
||||||
|
r0 = ret.Get(0).([]*querypb.HighlightResult)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if rf, ok := ret.Get(1).(func(context.Context, *querypb.GetHighlightRequest) error); ok {
|
||||||
|
r1 = rf(ctx, req)
|
||||||
|
} else {
|
||||||
|
r1 = ret.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r0, r1
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockShardDelegator_GetHighlight_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHighlight'
|
||||||
|
type MockShardDelegator_GetHighlight_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHighlight is a helper method to define mock.On call
|
||||||
|
// - ctx context.Context
|
||||||
|
// - req *querypb.GetHighlightRequest
|
||||||
|
func (_e *MockShardDelegator_Expecter) GetHighlight(ctx interface{}, req interface{}) *MockShardDelegator_GetHighlight_Call {
|
||||||
|
return &MockShardDelegator_GetHighlight_Call{Call: _e.mock.On("GetHighlight", ctx, req)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockShardDelegator_GetHighlight_Call) Run(run func(ctx context.Context, req *querypb.GetHighlightRequest)) *MockShardDelegator_GetHighlight_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
run(args[0].(context.Context), args[1].(*querypb.GetHighlightRequest))
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockShardDelegator_GetHighlight_Call) Return(_a0 []*querypb.HighlightResult, _a1 error) *MockShardDelegator_GetHighlight_Call {
|
||||||
|
_c.Call.Return(_a0, _a1)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockShardDelegator_GetHighlight_Call) RunAndReturn(run func(context.Context, *querypb.GetHighlightRequest) ([]*querypb.HighlightResult, error)) *MockShardDelegator_GetHighlight_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
// GetPartitionStatsVersions provides a mock function with given fields: ctx
|
// GetPartitionStatsVersions provides a mock function with given fields: ctx
|
||||||
func (_m *MockShardDelegator) GetPartitionStatsVersions(ctx context.Context) map[int64]int64 {
|
func (_m *MockShardDelegator) GetPartitionStatsVersions(ctx context.Context) map[int64]int64 {
|
||||||
ret := _m.Called(ctx)
|
ret := _m.Called(ctx)
|
||||||
|
|||||||
@ -2,7 +2,10 @@ package delegator
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"google.golang.org/protobuf/proto"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
@ -10,7 +13,9 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BuildSparseFieldData(field *schemapb.FieldSchema, sparseArray *schemapb.SparseFloatArray) *schemapb.FieldData {
|
func BuildSparseFieldData(field *schemapb.FieldSchema, sparseArray *schemapb.SparseFloatArray) *schemapb.FieldData {
|
||||||
@ -62,3 +67,117 @@ func SetBM25Params(req *internalpb.SearchRequest, avgdl float64) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type (
|
||||||
|
Span [2]int64
|
||||||
|
SpanList []Span
|
||||||
|
)
|
||||||
|
|
||||||
|
func (a SpanList) Len() int { return len(a) }
|
||||||
|
func (a SpanList) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||||
|
func (a SpanList) Less(i, j int) bool {
|
||||||
|
if a[i][0] == a[j][0] {
|
||||||
|
return a[i][1] < a[j][1]
|
||||||
|
}
|
||||||
|
return a[i][0] < a[j][0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// merge repeated segments
|
||||||
|
func mergeOffsets(input SpanList) SpanList {
|
||||||
|
sort.Sort(input)
|
||||||
|
maxEndOffset := int64(-1)
|
||||||
|
offsets := SpanList{}
|
||||||
|
for _, pair := range input {
|
||||||
|
if pair[1] > maxEndOffset {
|
||||||
|
if len(offsets) == 0 || pair[0] > offsets[len(offsets)-1][1] {
|
||||||
|
// if start offset > max offset before,
|
||||||
|
// no any intersection with previous one,
|
||||||
|
// use all pair.
|
||||||
|
offsets = append(offsets, pair)
|
||||||
|
} else {
|
||||||
|
// if start offset <= max offset before,
|
||||||
|
// has intersection with previous one,
|
||||||
|
// merge two offset to one.
|
||||||
|
offsets[len(offsets)-1][1] = pair[1]
|
||||||
|
}
|
||||||
|
maxEndOffset = pair[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return offsets
|
||||||
|
}
|
||||||
|
|
||||||
|
func bytesOffsetToRuneOffset(text string, spans SpanList) error {
|
||||||
|
byteOffsetSet := typeutil.NewSet[int64]()
|
||||||
|
for _, span := range spans {
|
||||||
|
byteOffsetSet.Insert(span[0])
|
||||||
|
byteOffsetSet.Insert(span[1])
|
||||||
|
}
|
||||||
|
offsetMap := map[int64]int64{0: 0, int64(len(text)): int64(utf8.RuneCountInString(text))}
|
||||||
|
|
||||||
|
cnt := int64(0)
|
||||||
|
for i := range text {
|
||||||
|
if byteOffsetSet.Contain(int64(i)) {
|
||||||
|
offsetMap[int64(i)] = cnt
|
||||||
|
}
|
||||||
|
cnt++
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert spans from byte offsets to rune offsets
|
||||||
|
for i, span := range spans {
|
||||||
|
startOffset, ok := offsetMap[span[0]]
|
||||||
|
if !ok {
|
||||||
|
return errors.Errorf("start offset: %d not found (text: %d bytes)", span[0], len(text))
|
||||||
|
}
|
||||||
|
endOffset, ok := offsetMap[span[1]]
|
||||||
|
if !ok {
|
||||||
|
return errors.Errorf("end offset: %d not found (text: %d bytes)", span[1], len(text))
|
||||||
|
}
|
||||||
|
spans[i][0] = startOffset
|
||||||
|
spans[i][1] = endOffset
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchFragmentsFromOffsets(text string, spans SpanList, fragmentOffset int64, fragmentSize int64, numOfFragments int64) []*querypb.HighlightFragment {
|
||||||
|
result := make([]*querypb.HighlightFragment, 0)
|
||||||
|
textRuneLen := int64(utf8.RuneCountInString(text))
|
||||||
|
|
||||||
|
var frag *querypb.HighlightFragment = nil
|
||||||
|
next := func(span *Span) bool {
|
||||||
|
startOffset := max(0, span[0]-fragmentOffset)
|
||||||
|
endOffset := min(max(span[1], startOffset+fragmentSize), textRuneLen)
|
||||||
|
if frag != nil {
|
||||||
|
result = append(result, frag)
|
||||||
|
}
|
||||||
|
if len(result) >= int(numOfFragments) {
|
||||||
|
frag = nil
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
frag = &querypb.HighlightFragment{
|
||||||
|
StartOffset: startOffset,
|
||||||
|
EndOffset: endOffset,
|
||||||
|
Offsets: []int64{span[0], span[1]},
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, span := range spans {
|
||||||
|
if frag == nil || span[0] > frag.EndOffset {
|
||||||
|
if !next(&span) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// append rune offset to fragment
|
||||||
|
frag.Offsets = append(frag.Offsets, spans[i][0], spans[i][1])
|
||||||
|
// extend fragment end offset if this span goes beyond current boundary
|
||||||
|
if span[1] > frag.EndOffset {
|
||||||
|
frag.EndOffset = span[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if frag != nil {
|
||||||
|
result = append(result, frag)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
38
internal/querynodev2/delegator/util_test.go
Normal file
38
internal/querynodev2/delegator/util_test.go
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
package delegator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBytesOffsetToRuneOffset(t *testing.T) {
|
||||||
|
// test with chinese
|
||||||
|
text := "你好世界" // 12 bytes, 4 runes
|
||||||
|
spans := SpanList{{0, 6}, {6, 12}}
|
||||||
|
err := bytesOffsetToRuneOffset(text, spans)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, SpanList{{0, 2}, {2, 4}}, spans)
|
||||||
|
|
||||||
|
// test with emoji
|
||||||
|
text = "Hello👋World" // 15 bytes, 11 runes
|
||||||
|
spans = SpanList{{0, 5}, {5, 9}, {9, 14}}
|
||||||
|
err = bytesOffsetToRuneOffset(text, spans)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, SpanList{{0, 5}, {5, 6}, {6, 11}}, spans)
|
||||||
|
}
|
||||||
@ -1667,3 +1667,36 @@ func (node *QueryNode) DropIndex(ctx context.Context, req *querypb.DropIndexRequ
|
|||||||
|
|
||||||
return merr.Success(), nil
|
return merr.Success(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (node *QueryNode) GetHighlight(ctx context.Context, req *querypb.GetHighlightRequest) (*querypb.GetHighlightResponse, error) {
|
||||||
|
// check node healthy
|
||||||
|
if err := node.lifetime.Add(merr.IsHealthy); err != nil {
|
||||||
|
return &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Status(err),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
defer node.lifetime.Done()
|
||||||
|
|
||||||
|
// get delegator
|
||||||
|
sd, ok := node.delegators.Get(req.GetChannel())
|
||||||
|
if !ok {
|
||||||
|
err := merr.WrapErrChannelNotFound(req.GetChannel())
|
||||||
|
log.Warn("GetHighlight failed, failed to get shard delegator", zap.Error(err))
|
||||||
|
return &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Status(err),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
results, err := sd.GetHighlight(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("GetHighlight failed, delegator run failed", zap.Error(err))
|
||||||
|
return &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Status(err),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &querypb.GetHighlightResponse{
|
||||||
|
Status: merr.Success(),
|
||||||
|
Results: results,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|||||||
@ -2417,6 +2417,49 @@ func (suite *ServiceSuite) TestRunAnalyzer() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *ServiceSuite) TestGetHighlight() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
suite.Run("node not healthy", func() {
|
||||||
|
suite.node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
||||||
|
defer suite.node.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||||
|
|
||||||
|
resp, err := suite.node.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Channel: suite.vchannel,
|
||||||
|
Topks: []int64{10},
|
||||||
|
})
|
||||||
|
|
||||||
|
suite.NoError(err)
|
||||||
|
suite.Error(merr.Error(resp.GetStatus()))
|
||||||
|
})
|
||||||
|
|
||||||
|
suite.Run("normal case", func() {
|
||||||
|
delegator := &delegator.MockShardDelegator{}
|
||||||
|
suite.node.delegators.Insert(suite.vchannel, delegator)
|
||||||
|
defer suite.node.delegators.GetAndRemove(suite.vchannel)
|
||||||
|
delegator.EXPECT().GetHighlight(mock.Anything, mock.Anything).Return(
|
||||||
|
[]*querypb.HighlightResult{}, nil)
|
||||||
|
resp, err := suite.node.GetHighlight(ctx, &querypb.GetHighlightRequest{
|
||||||
|
Channel: suite.vchannel,
|
||||||
|
Topks: []int64{1, 1},
|
||||||
|
Tasks: []*querypb.HighlightTask{
|
||||||
|
{
|
||||||
|
FieldName: "text_field",
|
||||||
|
FieldId: 100,
|
||||||
|
Texts: []string{"target text", "target text2", "text", "text2"},
|
||||||
|
AnalyzerNames: []string{"standard", "standard", "standard", "standard"},
|
||||||
|
SearchTextNum: 2,
|
||||||
|
CorpusTextNum: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
suite.NoError(err)
|
||||||
|
suite.NoError(merr.Error(resp.GetStatus()))
|
||||||
|
suite.NotNil(resp.Results)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestQueryNodeService(t *testing.T) {
|
func TestQueryNodeService(t *testing.T) {
|
||||||
wal := mock_streaming.NewMockWALAccesser(t)
|
wal := mock_streaming.NewMockWALAccesser(t)
|
||||||
local := mock_streaming.NewMockLocal(t)
|
local := mock_streaming.NewMockLocal(t)
|
||||||
|
|||||||
@ -142,6 +142,10 @@ func (m *GrpcQueryNodeClient) RunAnalyzer(ctx context.Context, in *querypb.RunAn
|
|||||||
return &milvuspb.RunAnalyzerResponse{}, m.Err
|
return &milvuspb.RunAnalyzerResponse{}, m.Err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *GrpcQueryNodeClient) GetHighlight(ctx context.Context, in *querypb.GetHighlightRequest, opts ...grpc.CallOption) (*querypb.GetHighlightResponse, error) {
|
||||||
|
return &querypb.GetHighlightResponse{}, m.Err
|
||||||
|
}
|
||||||
|
|
||||||
func (m *GrpcQueryNodeClient) Close() error {
|
func (m *GrpcQueryNodeClient) Close() error {
|
||||||
return m.Err
|
return m.Err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -160,6 +160,10 @@ func (qn *qnServerWrapper) RunAnalyzer(ctx context.Context, in *querypb.RunAnaly
|
|||||||
return qn.QueryNode.RunAnalyzer(ctx, in)
|
return qn.QueryNode.RunAnalyzer(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (qn *qnServerWrapper) GetHighlight(ctx context.Context, in *querypb.GetHighlightRequest, _ ...grpc.CallOption) (*querypb.GetHighlightResponse, error) {
|
||||||
|
return qn.QueryNode.GetHighlight(ctx, in)
|
||||||
|
}
|
||||||
|
|
||||||
func (qn *qnServerWrapper) DropIndex(ctx context.Context, in *querypb.DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
func (qn *qnServerWrapper) DropIndex(ctx context.Context, in *querypb.DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||||
return qn.QueryNode.DropIndex(ctx, in)
|
return qn.QueryNode.DropIndex(ctx, in)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,7 +22,7 @@ require (
|
|||||||
github.com/jolestar/go-commons-pool/v2 v2.1.2
|
github.com/jolestar/go-commons-pool/v2 v2.1.2
|
||||||
github.com/json-iterator/go v1.1.13-0.20220915233716-71ac16282d12
|
github.com/json-iterator/go v1.1.13-0.20220915233716-71ac16282d12
|
||||||
github.com/klauspost/compress v1.18.0
|
github.com/klauspost/compress v1.18.0
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c
|
||||||
github.com/minio/minio-go/v7 v7.0.73
|
github.com/minio/minio-go/v7 v7.0.73
|
||||||
github.com/panjf2000/ants/v2 v2.11.3
|
github.com/panjf2000/ants/v2 v2.11.3
|
||||||
github.com/prometheus/client_golang v1.20.5
|
github.com/prometheus/client_golang v1.20.5
|
||||||
|
|||||||
@ -621,8 +621,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6 h1:YHMFI6L
|
|||||||
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
|
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
|
||||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
|
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
|
||||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7 h1:RJtZbkS5zKNIXxsqjGBUZc2SbnI4MGq+TfOfc8tJsuM=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c h1:Gh02wIJEI6RUbEXwZworPBfK9BYd1SVBIDHDL8GsrCY=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.7/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.8-0.20251203092105-e2557263fc4c/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
||||||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||||
github.com/minio/minio-go/v7 v7.0.73 h1:qr2vi96Qm7kZ4v7LLebjte+MQh621fFWnv93p12htEo=
|
github.com/minio/minio-go/v7 v7.0.73 h1:qr2vi96Qm7kZ4v7LLebjte+MQh621fFWnv93p12htEo=
|
||||||
|
|||||||
@ -172,6 +172,8 @@ service QueryNode {
|
|||||||
rpc UpdateSchema(UpdateSchemaRequest) returns (common.Status) {}
|
rpc UpdateSchema(UpdateSchemaRequest) returns (common.Status) {}
|
||||||
|
|
||||||
rpc RunAnalyzer(RunAnalyzerRequest) returns(milvus.RunAnalyzerResponse){}
|
rpc RunAnalyzer(RunAnalyzerRequest) returns(milvus.RunAnalyzerResponse){}
|
||||||
|
rpc GetHighlight(GetHighlightRequest) returns (GetHighlightResponse){}
|
||||||
|
|
||||||
rpc DropIndex(DropIndexRequest) returns (common.Status) {}
|
rpc DropIndex(DropIndexRequest) returns (common.Status) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -995,6 +997,63 @@ message RunAnalyzerRequest{
|
|||||||
bool with_hash = 7;
|
bool with_hash = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message HighlightOptions{
|
||||||
|
int64 fragment_size = 1;
|
||||||
|
int64 fragment_offset = 2;
|
||||||
|
int64 num_of_fragments = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum HighlightQueryType{
|
||||||
|
TextMatch = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
message HighlightQuery{
|
||||||
|
HighlightQueryType type = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// HighlightTask fetch highlight for all queries at one field
|
||||||
|
// search_text_num/search_num == len(topks) == nq
|
||||||
|
// corpus_text_num == sum(topks) == len(search_results)
|
||||||
|
message HighlightTask{
|
||||||
|
string field_name = 1;
|
||||||
|
int64 field_id = 2;
|
||||||
|
// len(texts) = search_text_num + corpus_text_num + len(queries);
|
||||||
|
// text = search_text...corpus_text...query_text
|
||||||
|
repeated string texts = 3;
|
||||||
|
repeated string analyzer_names = 4; // used if field with multi-analyzer
|
||||||
|
|
||||||
|
int64 search_text_num = 5;
|
||||||
|
int64 corpus_text_num = 6;
|
||||||
|
HighlightOptions options = 7;
|
||||||
|
repeated HighlightQuery queries = 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Lexical highlight from delegator
|
||||||
|
message GetHighlightRequest{
|
||||||
|
common.MsgBase base = 1;
|
||||||
|
string channel = 2;
|
||||||
|
repeated int64 topks = 3;
|
||||||
|
repeated HighlightTask tasks=4; // one task for one field
|
||||||
|
}
|
||||||
|
|
||||||
|
// start_offset and end_offset are fragment offset in the original text
|
||||||
|
// number of offsets always be 2 * number of highlight terms in the fragment
|
||||||
|
message HighlightFragment{
|
||||||
|
int64 start_offset = 1;
|
||||||
|
int64 end_offset = 2;
|
||||||
|
// char offset of the highlight terms in the fragment
|
||||||
|
repeated int64 offsets = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message HighlightResult{
|
||||||
|
repeated HighlightFragment fragments = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetHighlightResponse{
|
||||||
|
common.Status status = 1;
|
||||||
|
repeated HighlightResult results = 2;
|
||||||
|
}
|
||||||
|
|
||||||
message ListLoadedSegmentsRequest {
|
message ListLoadedSegmentsRequest {
|
||||||
common.MsgBase base = 1;
|
common.MsgBase base = 1;
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1478,6 +1478,7 @@ const (
|
|||||||
QueryNode_DeleteBatch_FullMethodName = "/milvus.proto.query.QueryNode/DeleteBatch"
|
QueryNode_DeleteBatch_FullMethodName = "/milvus.proto.query.QueryNode/DeleteBatch"
|
||||||
QueryNode_UpdateSchema_FullMethodName = "/milvus.proto.query.QueryNode/UpdateSchema"
|
QueryNode_UpdateSchema_FullMethodName = "/milvus.proto.query.QueryNode/UpdateSchema"
|
||||||
QueryNode_RunAnalyzer_FullMethodName = "/milvus.proto.query.QueryNode/RunAnalyzer"
|
QueryNode_RunAnalyzer_FullMethodName = "/milvus.proto.query.QueryNode/RunAnalyzer"
|
||||||
|
QueryNode_GetHighlight_FullMethodName = "/milvus.proto.query.QueryNode/GetHighlight"
|
||||||
QueryNode_DropIndex_FullMethodName = "/milvus.proto.query.QueryNode/DropIndex"
|
QueryNode_DropIndex_FullMethodName = "/milvus.proto.query.QueryNode/DropIndex"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1515,6 +1516,7 @@ type QueryNodeClient interface {
|
|||||||
DeleteBatch(ctx context.Context, in *DeleteBatchRequest, opts ...grpc.CallOption) (*DeleteBatchResponse, error)
|
DeleteBatch(ctx context.Context, in *DeleteBatchRequest, opts ...grpc.CallOption) (*DeleteBatchResponse, error)
|
||||||
UpdateSchema(ctx context.Context, in *UpdateSchemaRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
UpdateSchema(ctx context.Context, in *UpdateSchemaRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||||
RunAnalyzer(ctx context.Context, in *RunAnalyzerRequest, opts ...grpc.CallOption) (*milvuspb.RunAnalyzerResponse, error)
|
RunAnalyzer(ctx context.Context, in *RunAnalyzerRequest, opts ...grpc.CallOption) (*milvuspb.RunAnalyzerResponse, error)
|
||||||
|
GetHighlight(ctx context.Context, in *GetHighlightRequest, opts ...grpc.CallOption) (*GetHighlightResponse, error)
|
||||||
DropIndex(ctx context.Context, in *DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
DropIndex(ctx context.Context, in *DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1815,6 +1817,15 @@ func (c *queryNodeClient) RunAnalyzer(ctx context.Context, in *RunAnalyzerReques
|
|||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *queryNodeClient) GetHighlight(ctx context.Context, in *GetHighlightRequest, opts ...grpc.CallOption) (*GetHighlightResponse, error) {
|
||||||
|
out := new(GetHighlightResponse)
|
||||||
|
err := c.cc.Invoke(ctx, QueryNode_GetHighlight_FullMethodName, in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *queryNodeClient) DropIndex(ctx context.Context, in *DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
func (c *queryNodeClient) DropIndex(ctx context.Context, in *DropIndexRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||||
out := new(commonpb.Status)
|
out := new(commonpb.Status)
|
||||||
err := c.cc.Invoke(ctx, QueryNode_DropIndex_FullMethodName, in, out, opts...)
|
err := c.cc.Invoke(ctx, QueryNode_DropIndex_FullMethodName, in, out, opts...)
|
||||||
@ -1858,6 +1869,7 @@ type QueryNodeServer interface {
|
|||||||
DeleteBatch(context.Context, *DeleteBatchRequest) (*DeleteBatchResponse, error)
|
DeleteBatch(context.Context, *DeleteBatchRequest) (*DeleteBatchResponse, error)
|
||||||
UpdateSchema(context.Context, *UpdateSchemaRequest) (*commonpb.Status, error)
|
UpdateSchema(context.Context, *UpdateSchemaRequest) (*commonpb.Status, error)
|
||||||
RunAnalyzer(context.Context, *RunAnalyzerRequest) (*milvuspb.RunAnalyzerResponse, error)
|
RunAnalyzer(context.Context, *RunAnalyzerRequest) (*milvuspb.RunAnalyzerResponse, error)
|
||||||
|
GetHighlight(context.Context, *GetHighlightRequest) (*GetHighlightResponse, error)
|
||||||
DropIndex(context.Context, *DropIndexRequest) (*commonpb.Status, error)
|
DropIndex(context.Context, *DropIndexRequest) (*commonpb.Status, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1946,6 +1958,9 @@ func (UnimplementedQueryNodeServer) UpdateSchema(context.Context, *UpdateSchemaR
|
|||||||
func (UnimplementedQueryNodeServer) RunAnalyzer(context.Context, *RunAnalyzerRequest) (*milvuspb.RunAnalyzerResponse, error) {
|
func (UnimplementedQueryNodeServer) RunAnalyzer(context.Context, *RunAnalyzerRequest) (*milvuspb.RunAnalyzerResponse, error) {
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method RunAnalyzer not implemented")
|
return nil, status.Errorf(codes.Unimplemented, "method RunAnalyzer not implemented")
|
||||||
}
|
}
|
||||||
|
func (UnimplementedQueryNodeServer) GetHighlight(context.Context, *GetHighlightRequest) (*GetHighlightResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method GetHighlight not implemented")
|
||||||
|
}
|
||||||
func (UnimplementedQueryNodeServer) DropIndex(context.Context, *DropIndexRequest) (*commonpb.Status, error) {
|
func (UnimplementedQueryNodeServer) DropIndex(context.Context, *DropIndexRequest) (*commonpb.Status, error) {
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method DropIndex not implemented")
|
return nil, status.Errorf(codes.Unimplemented, "method DropIndex not implemented")
|
||||||
}
|
}
|
||||||
@ -2453,6 +2468,24 @@ func _QueryNode_RunAnalyzer_Handler(srv interface{}, ctx context.Context, dec fu
|
|||||||
return interceptor(ctx, in, info, handler)
|
return interceptor(ctx, in, info, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func _QueryNode_GetHighlight_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(GetHighlightRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(QueryNodeServer).GetHighlight(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: QueryNode_GetHighlight_FullMethodName,
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(QueryNodeServer).GetHighlight(ctx, req.(*GetHighlightRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
func _QueryNode_DropIndex_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
func _QueryNode_DropIndex_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
in := new(DropIndexRequest)
|
in := new(DropIndexRequest)
|
||||||
if err := dec(in); err != nil {
|
if err := dec(in); err != nil {
|
||||||
@ -2578,6 +2611,10 @@ var QueryNode_ServiceDesc = grpc.ServiceDesc{
|
|||||||
MethodName: "RunAnalyzer",
|
MethodName: "RunAnalyzer",
|
||||||
Handler: _QueryNode_RunAnalyzer_Handler,
|
Handler: _QueryNode_RunAnalyzer_Handler,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
MethodName: "GetHighlight",
|
||||||
|
Handler: _QueryNode_GetHighlight_Handler,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
MethodName: "DropIndex",
|
MethodName: "DropIndex",
|
||||||
Handler: _QueryNode_DropIndex_Handler,
|
Handler: _QueryNode_DropIndex_Handler,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user