enhance: [GoSDK]support run analyzer by collection field (#42642) (#42812)

relate: https://github.com/milvus-io/milvus/issues/42094
pr: https://github.com/milvus-io/milvus/pull/42642

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
aoiasd 2025-06-23 15:42:41 +08:00 committed by GitHub
parent 78b66a29b6
commit f3e8f61f60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 98 additions and 6 deletions

View File

@ -524,3 +524,63 @@ func ExampleClient_DropCollection() {
// handle err // handle err
} }
} }
func ExampleClient_RunAnalyzer() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
milvusAddr := "127.0.0.1:19530"
collectionName := "test_run_analyzer"
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
Address: milvusAddr,
})
if err != nil {
log.Fatal("failed to connect to milvus server: ", err.Error())
}
defer cli.Close(ctx)
schema := entity.NewSchema().
WithField(entity.NewField().WithName("pk").WithIsPrimaryKey(true).WithIsAutoID(true).WithDataType(entity.FieldTypeInt64)).
WithField(entity.NewField().WithName("text").WithDataType(entity.FieldTypeVarChar).WithMaxLength(255).WithEnableAnalyzer(true).WithAnalyzerParams(map[string]any{"tokenizer": "standard"})).
WithField(entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector)).
WithFunction(entity.NewFunction().WithInputFields("text").WithOutputFields("sparse").WithType(entity.FunctionTypeBM25).WithName("bm25")).
WithAutoID(true)
err = cli.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema))
if err != nil {
log.Fatal("failed to connect to create test collection: ", err.Error())
}
cli.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "sparse", index.NewAutoIndex(entity.BM25)).WithIndexName("bm25"))
cli.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName))
// Run analyzer with loaded collection field (Must be bm25 function input)
result, err := cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithField(collectionName, "text"))
if err != nil {
log.Fatal("failed to run analyzer with loaded collection field: ", err)
}
println("Run analyzer result with loaded collection field")
for _, token := range result[0].Tokens {
println(token.Text)
}
params := map[string]any{
"tokenizer": "standard",
"filter": []any{map[string]any{
"type": "stop",
"stop_words": []string{"test"}, // remove word "test"
}},
}
// Run analyzer with new analyzer params
result, err = cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithAnalyzerParams(params))
if err != nil {
log.Fatal("failed to run analyzer with new analyzer params: ", err)
}
println("Run analyzer with new analyzer params")
for _, token := range result[0].Tokens {
println(token.Text)
}
}

View File

@ -620,23 +620,44 @@ type RunAnalyzerOption interface {
type runAnalyzerOption struct { type runAnalyzerOption struct {
text []string text []string
collectionName string
fieldName string
analyzerNames []string
analyzerParams string analyzerParams string
withDetail bool withDetail bool
withHash bool withHash bool
err error
} }
func (opt *runAnalyzerOption) Request() (*milvuspb.RunAnalyzerRequest, error) { func (opt *runAnalyzerOption) Request() (*milvuspb.RunAnalyzerRequest, error) {
if opt.err != nil {
return nil, opt.err
}
return &milvuspb.RunAnalyzerRequest{ return &milvuspb.RunAnalyzerRequest{
Placeholder: lo.Map(opt.text, func(str string, _ int) []byte { return []byte(str) }), Placeholder: lo.Map(opt.text, func(str string, _ int) []byte { return []byte(str) }),
AnalyzerParams: opt.analyzerParams, AnalyzerParams: opt.analyzerParams,
CollectionName: opt.collectionName,
FieldName: opt.fieldName,
AnalyzerNames: opt.analyzerNames,
WithDetail: opt.withDetail,
WithHash: opt.withHash,
}, nil }, nil
} }
func (opt *runAnalyzerOption) WithAnalyzerParams(params string) *runAnalyzerOption { func (opt *runAnalyzerOption) WithAnalyzerParamsStr(params string) *runAnalyzerOption {
opt.analyzerParams = params opt.analyzerParams = params
return opt return opt
} }
func (opt *runAnalyzerOption) WithAnalyzerParams(params map[string]any) *runAnalyzerOption {
s, err := json.Marshal(params)
if err != nil {
opt.err = err
}
opt.analyzerParams = string(s)
return opt
}
func (opt *runAnalyzerOption) WithDetail() *runAnalyzerOption { func (opt *runAnalyzerOption) WithDetail() *runAnalyzerOption {
opt.withDetail = true opt.withDetail = true
return opt return opt
@ -647,7 +668,18 @@ func (opt *runAnalyzerOption) WithHash() *runAnalyzerOption {
return opt return opt
} }
func NewRunAnalyzerOption(text []string) *runAnalyzerOption { func (opt *runAnalyzerOption) WithField(collectionName, fieldName string) *runAnalyzerOption {
opt.collectionName = collectionName
opt.fieldName = fieldName
return opt
}
func (opt *runAnalyzerOption) WithAnalyzerName(names ...string) *runAnalyzerOption {
opt.analyzerNames = names
return opt
}
func NewRunAnalyzerOption(text ...string) *runAnalyzerOption {
return &runAnalyzerOption{ return &runAnalyzerOption{
text: text, text: text,
} }

View File

@ -1214,19 +1214,19 @@ func TestRunAnalyzer(t *testing.T) {
mc := hp.CreateDefaultMilvusClient(ctx, t) mc := hp.CreateDefaultMilvusClient(ctx, t)
// run analyzer with default analyzer // run analyzer with default analyzer
tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"})) tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc"))
require.NoError(t, err) require.NoError(t, err)
for i, text := range []string{"test", "doc"} { for i, text := range []string{"test", "doc"} {
require.Equal(t, text, tokens[0].Tokens[i].Text) require.Equal(t, text, tokens[0].Tokens[i].Text)
} }
// run analyzer with invalid params // run analyzer with invalid params
_, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"text doc"}).WithAnalyzerParams("invalid params}")) _, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("text doc").WithAnalyzerParamsStr("invalid params}"))
common.CheckErr(t, err, false, "JsonError") common.CheckErr(t, err, false, "JsonError")
// run analyzer with custom analyzer // run analyzer with custom analyzer
tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"}). tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc").
WithAnalyzerParams(`{"type": "standard", "stop_words": ["test"]}`)) WithAnalyzerParamsStr(`{"type": "standard", "stop_words": ["test"]}`))
require.NoError(t, err) require.NoError(t, err)
for i, text := range []string{"doc"} { for i, text := range []string{"doc"} {