From f3e8f61f60c4768c535afa80fdbcf10c0250659b Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Mon, 23 Jun 2025 15:42:41 +0800 Subject: [PATCH] enhance: [GoSDK]support run analyzer by collection field (#42642) (#42812) relate: https://github.com/milvus-io/milvus/issues/42094 pr: https://github.com/milvus-io/milvus/pull/42642 Signed-off-by: aoiasd --- .../milvusclient/collection_example_test.go | 60 +++++++++++++++++++ client/milvusclient/read_options.go | 36 ++++++++++- tests/go_client/testcases/query_test.go | 8 +-- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/client/milvusclient/collection_example_test.go b/client/milvusclient/collection_example_test.go index 88786921f2..339ae6b3da 100644 --- a/client/milvusclient/collection_example_test.go +++ b/client/milvusclient/collection_example_test.go @@ -524,3 +524,63 @@ func ExampleClient_DropCollection() { // handle err } } + +func ExampleClient_RunAnalyzer() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + milvusAddr := "127.0.0.1:19530" + collectionName := "test_run_analyzer" + + cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{ + Address: milvusAddr, + }) + if err != nil { + log.Fatal("failed to connect to milvus server: ", err.Error()) + } + defer cli.Close(ctx) + + schema := entity.NewSchema(). + WithField(entity.NewField().WithName("pk").WithIsPrimaryKey(true).WithIsAutoID(true).WithDataType(entity.FieldTypeInt64)). + WithField(entity.NewField().WithName("text").WithDataType(entity.FieldTypeVarChar).WithMaxLength(255).WithEnableAnalyzer(true).WithAnalyzerParams(map[string]any{"tokenizer": "standard"})). + WithField(entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector)). + WithFunction(entity.NewFunction().WithInputFields("text").WithOutputFields("sparse").WithType(entity.FunctionTypeBM25).WithName("bm25")). + WithAutoID(true) + + err = cli.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema)) + if err != nil { + log.Fatal("failed to connect to create test collection: ", err.Error()) + } + + cli.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "sparse", index.NewAutoIndex(entity.BM25)).WithIndexName("bm25")) + cli.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName)) + + // Run analyzer with loaded collection field (Must be bm25 function input) + result, err := cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithField(collectionName, "text")) + if err != nil { + log.Fatal("failed to run analyzer with loaded collection field: ", err) + } + + println("Run analyzer result with loaded collection field") + for _, token := range result[0].Tokens { + println(token.Text) + } + + params := map[string]any{ + "tokenizer": "standard", + "filter": []any{map[string]any{ + "type": "stop", + "stop_words": []string{"test"}, // remove word "test" + }}, + } + // Run analyzer with new analyzer params + result, err = cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithAnalyzerParams(params)) + if err != nil { + log.Fatal("failed to run analyzer with new analyzer params: ", err) + } + + println("Run analyzer with new analyzer params") + for _, token := range result[0].Tokens { + println(token.Text) + } +} diff --git a/client/milvusclient/read_options.go b/client/milvusclient/read_options.go index f492ba4ac2..4eeb00b1c7 100644 --- a/client/milvusclient/read_options.go +++ b/client/milvusclient/read_options.go @@ -620,23 +620,44 @@ type RunAnalyzerOption interface { type runAnalyzerOption struct { text []string + collectionName string + fieldName string + analyzerNames []string analyzerParams string withDetail bool withHash bool + err error } func (opt *runAnalyzerOption) Request() (*milvuspb.RunAnalyzerRequest, error) { + if opt.err != nil { + return nil, opt.err + } return &milvuspb.RunAnalyzerRequest{ Placeholder: lo.Map(opt.text, func(str string, _ int) []byte { return []byte(str) }), AnalyzerParams: opt.analyzerParams, + CollectionName: opt.collectionName, + FieldName: opt.fieldName, + AnalyzerNames: opt.analyzerNames, + WithDetail: opt.withDetail, + WithHash: opt.withHash, }, nil } -func (opt *runAnalyzerOption) WithAnalyzerParams(params string) *runAnalyzerOption { +func (opt *runAnalyzerOption) WithAnalyzerParamsStr(params string) *runAnalyzerOption { opt.analyzerParams = params return opt } +func (opt *runAnalyzerOption) WithAnalyzerParams(params map[string]any) *runAnalyzerOption { + s, err := json.Marshal(params) + if err != nil { + opt.err = err + } + opt.analyzerParams = string(s) + return opt +} + func (opt *runAnalyzerOption) WithDetail() *runAnalyzerOption { opt.withDetail = true return opt @@ -647,7 +668,18 @@ func (opt *runAnalyzerOption) WithHash() *runAnalyzerOption { return opt } -func NewRunAnalyzerOption(text []string) *runAnalyzerOption { +func (opt *runAnalyzerOption) WithField(collectionName, fieldName string) *runAnalyzerOption { + opt.collectionName = collectionName + opt.fieldName = fieldName + return opt +} + +func (opt *runAnalyzerOption) WithAnalyzerName(names ...string) *runAnalyzerOption { + opt.analyzerNames = names + return opt +} + +func NewRunAnalyzerOption(text ...string) *runAnalyzerOption { return &runAnalyzerOption{ text: text, } diff --git a/tests/go_client/testcases/query_test.go b/tests/go_client/testcases/query_test.go index b9f97a7315..989bb3775e 100644 --- a/tests/go_client/testcases/query_test.go +++ b/tests/go_client/testcases/query_test.go @@ -1214,19 +1214,19 @@ func TestRunAnalyzer(t *testing.T) { mc := hp.CreateDefaultMilvusClient(ctx, t) // run analyzer with default analyzer - tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"})) + tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc")) require.NoError(t, err) for i, text := range []string{"test", "doc"} { require.Equal(t, text, tokens[0].Tokens[i].Text) } // run analyzer with invalid params - _, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"text doc"}).WithAnalyzerParams("invalid params}")) + _, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("text doc").WithAnalyzerParamsStr("invalid params}")) common.CheckErr(t, err, false, "JsonError") // run analyzer with custom analyzer - tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"}). - WithAnalyzerParams(`{"type": "standard", "stop_words": ["test"]}`)) + tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc"). + WithAnalyzerParamsStr(`{"type": "standard", "stop_words": ["test"]}`)) require.NoError(t, err) for i, text := range []string{"doc"} {