mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
relate: https://github.com/milvus-io/milvus/issues/42094 pr: https://github.com/milvus-io/milvus/pull/42642 Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
parent
78b66a29b6
commit
f3e8f61f60
@ -524,3 +524,63 @@ func ExampleClient_DropCollection() {
|
||||
// handle err
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleClient_RunAnalyzer() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
milvusAddr := "127.0.0.1:19530"
|
||||
collectionName := "test_run_analyzer"
|
||||
|
||||
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
||||
Address: milvusAddr,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal("failed to connect to milvus server: ", err.Error())
|
||||
}
|
||||
defer cli.Close(ctx)
|
||||
|
||||
schema := entity.NewSchema().
|
||||
WithField(entity.NewField().WithName("pk").WithIsPrimaryKey(true).WithIsAutoID(true).WithDataType(entity.FieldTypeInt64)).
|
||||
WithField(entity.NewField().WithName("text").WithDataType(entity.FieldTypeVarChar).WithMaxLength(255).WithEnableAnalyzer(true).WithAnalyzerParams(map[string]any{"tokenizer": "standard"})).
|
||||
WithField(entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector)).
|
||||
WithFunction(entity.NewFunction().WithInputFields("text").WithOutputFields("sparse").WithType(entity.FunctionTypeBM25).WithName("bm25")).
|
||||
WithAutoID(true)
|
||||
|
||||
err = cli.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema))
|
||||
if err != nil {
|
||||
log.Fatal("failed to connect to create test collection: ", err.Error())
|
||||
}
|
||||
|
||||
cli.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "sparse", index.NewAutoIndex(entity.BM25)).WithIndexName("bm25"))
|
||||
cli.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName))
|
||||
|
||||
// Run analyzer with loaded collection field (Must be bm25 function input)
|
||||
result, err := cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithField(collectionName, "text"))
|
||||
if err != nil {
|
||||
log.Fatal("failed to run analyzer with loaded collection field: ", err)
|
||||
}
|
||||
|
||||
println("Run analyzer result with loaded collection field")
|
||||
for _, token := range result[0].Tokens {
|
||||
println(token.Text)
|
||||
}
|
||||
|
||||
params := map[string]any{
|
||||
"tokenizer": "standard",
|
||||
"filter": []any{map[string]any{
|
||||
"type": "stop",
|
||||
"stop_words": []string{"test"}, // remove word "test"
|
||||
}},
|
||||
}
|
||||
// Run analyzer with new analyzer params
|
||||
result, err = cli.RunAnalyzer(ctx, milvusclient.NewRunAnalyzerOption("test milvus").WithAnalyzerParams(params))
|
||||
if err != nil {
|
||||
log.Fatal("failed to run analyzer with new analyzer params: ", err)
|
||||
}
|
||||
|
||||
println("Run analyzer with new analyzer params")
|
||||
for _, token := range result[0].Tokens {
|
||||
println(token.Text)
|
||||
}
|
||||
}
|
||||
|
||||
@ -620,23 +620,44 @@ type RunAnalyzerOption interface {
|
||||
|
||||
type runAnalyzerOption struct {
|
||||
text []string
|
||||
collectionName string
|
||||
fieldName string
|
||||
analyzerNames []string
|
||||
analyzerParams string
|
||||
withDetail bool
|
||||
withHash bool
|
||||
err error
|
||||
}
|
||||
|
||||
func (opt *runAnalyzerOption) Request() (*milvuspb.RunAnalyzerRequest, error) {
|
||||
if opt.err != nil {
|
||||
return nil, opt.err
|
||||
}
|
||||
return &milvuspb.RunAnalyzerRequest{
|
||||
Placeholder: lo.Map(opt.text, func(str string, _ int) []byte { return []byte(str) }),
|
||||
AnalyzerParams: opt.analyzerParams,
|
||||
CollectionName: opt.collectionName,
|
||||
FieldName: opt.fieldName,
|
||||
AnalyzerNames: opt.analyzerNames,
|
||||
WithDetail: opt.withDetail,
|
||||
WithHash: opt.withHash,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (opt *runAnalyzerOption) WithAnalyzerParams(params string) *runAnalyzerOption {
|
||||
func (opt *runAnalyzerOption) WithAnalyzerParamsStr(params string) *runAnalyzerOption {
|
||||
opt.analyzerParams = params
|
||||
return opt
|
||||
}
|
||||
|
||||
func (opt *runAnalyzerOption) WithAnalyzerParams(params map[string]any) *runAnalyzerOption {
|
||||
s, err := json.Marshal(params)
|
||||
if err != nil {
|
||||
opt.err = err
|
||||
}
|
||||
opt.analyzerParams = string(s)
|
||||
return opt
|
||||
}
|
||||
|
||||
func (opt *runAnalyzerOption) WithDetail() *runAnalyzerOption {
|
||||
opt.withDetail = true
|
||||
return opt
|
||||
@ -647,7 +668,18 @@ func (opt *runAnalyzerOption) WithHash() *runAnalyzerOption {
|
||||
return opt
|
||||
}
|
||||
|
||||
func NewRunAnalyzerOption(text []string) *runAnalyzerOption {
|
||||
func (opt *runAnalyzerOption) WithField(collectionName, fieldName string) *runAnalyzerOption {
|
||||
opt.collectionName = collectionName
|
||||
opt.fieldName = fieldName
|
||||
return opt
|
||||
}
|
||||
|
||||
func (opt *runAnalyzerOption) WithAnalyzerName(names ...string) *runAnalyzerOption {
|
||||
opt.analyzerNames = names
|
||||
return opt
|
||||
}
|
||||
|
||||
func NewRunAnalyzerOption(text ...string) *runAnalyzerOption {
|
||||
return &runAnalyzerOption{
|
||||
text: text,
|
||||
}
|
||||
|
||||
@ -1214,19 +1214,19 @@ func TestRunAnalyzer(t *testing.T) {
|
||||
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||
|
||||
// run analyzer with default analyzer
|
||||
tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"}))
|
||||
tokens, err := mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc"))
|
||||
require.NoError(t, err)
|
||||
for i, text := range []string{"test", "doc"} {
|
||||
require.Equal(t, text, tokens[0].Tokens[i].Text)
|
||||
}
|
||||
|
||||
// run analyzer with invalid params
|
||||
_, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"text doc"}).WithAnalyzerParams("invalid params}"))
|
||||
_, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("text doc").WithAnalyzerParamsStr("invalid params}"))
|
||||
common.CheckErr(t, err, false, "JsonError")
|
||||
|
||||
// run analyzer with custom analyzer
|
||||
tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption([]string{"test doc"}).
|
||||
WithAnalyzerParams(`{"type": "standard", "stop_words": ["test"]}`))
|
||||
tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc").
|
||||
WithAnalyzerParamsStr(`{"type": "standard", "stop_words": ["test"]}`))
|
||||
|
||||
require.NoError(t, err)
|
||||
for i, text := range []string{"doc"} {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user