package testcases import ( "fmt" "math" "testing" "time" "github.com/stretchr/testify/require" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/client/v2/index" "github.com/milvus-io/milvus/client/v2/milvusclient" "github.com/milvus-io/milvus/tests/go_client/common" hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" ) // TestPhraseMatchDefault tests basic phrase match functionality with slop=0 func TestPhraseMatchDefault(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // create -> insert -> flush -> index -> load analyzerParams := map[string]any{"tokenizer": "standard"} fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) schemaOption := hp.TNewSchemaOption().TWithFunction(function) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) query := common.GenText(common.DefaultTextLang) insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query}) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) prepare.CreateIndex(ctx, t, mc, indexparams) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) // Test exact phrase match (slop=0) expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query) queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr)) common.CheckErr(t, err, true) // Results may vary as we're using auto-generated data, but it should >= 1, since query text has been inserted require.GreaterOrEqual(t, queryRes.ResultCount, 1) } // TestPhraseMatchWithSlop tests phrase match with different slop values func TestPhraseMatchWithSlop(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // create -> insert -> flush -> index -> load analyzerParams := map[string]any{"tokenizer": "standard"} fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) schemaOption := hp.TNewSchemaOption().TWithFunction(function) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) // Insert test data with varying distances between words query := common.GenText(common.DefaultTextLang) insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query}) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) prepare.CreateIndex(ctx, t, mc, indexparams) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) // Test cases with different slop values testCases := []struct { name string slop int }{ {"ExactMatch", 0}, // Matches only exact phrase {"SmallSlop", 1}, // Matches phrases with 1 word between {"MediumSlop", 2}, // Matches phrases with 2 words between {"LargeSlop", 3}, // Matches phrases with up to 3 words between {"VeryLargeSlop", math.MaxUint32}, // Matches phrases with up to max u32 words between } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop) queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr)) common.CheckErr(t, err, true) require.GreaterOrEqual(t, queryRes.ResultCount, 1) }) } } // TestPhraseMatchWithDiffLang tests phrase match with different languages func TestPhraseMatchWithDiffLang(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // Test cases for different languages and analyzers testCases := []struct { name string language string analyzer string slop int }{ { name: "English_Standard", language: common.English, analyzer: "standard", slop: 3, }, { name: "Chinese_Jieba", language: common.Chinese, analyzer: "jieba", slop: 3, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { analyzerParams := map[string]any{"tokenizer": tc.analyzer} fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) schemaOption := hp.TNewSchemaOption().TWithFunction(function) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) query := common.GenText(tc.language) insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithTextData([]string{query}) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) prepare.CreateIndex(ctx, t, mc, indexparams) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop) queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr)) common.CheckErr(t, err, true) require.GreaterOrEqual(t, queryRes.ResultCount, 1) }) } } // TestPhraseMatchWithEmptyData tests phrase match with empty data func TestPhraseMatchWithEmptyData(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // create -> insert -> flush -> index -> load analyzerParams := map[string]any{"tokenizer": "standard"} fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) schemaOption := hp.TNewSchemaOption().TWithFunction(function) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextEmptyPercent(100) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) prepare.CreateIndex(ctx, t, mc, indexparams) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) // Test phrase match with empty data query := common.GenText(common.DefaultTextLang) expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query) queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr)) common.CheckErr(t, err, true) require.Equal(t, 0, queryRes.ResultCount) } // TestPhraseMatchDefault tests basic phrase match functionality with slop=0 func TestPhraseMatchNullable(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // create -> insert -> flush -> index -> load fieldsOption := hp.TNewFieldOptions().WithFieldOption(common.DefaultTextFieldName, hp.TNewFieldsOption().TWithNullable(true). TWithEnableAnalyzer(true).TWithEnableMatch(true)) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, hp.TNewSchemaOption()) // insert data with all null validData := make([]bool, common.DefaultNb) for i := 0; i < common.DefaultNb; i++ { validData[i] = i%2 == 0 } query := common.GenText(common.DefaultTextLang) insertOption := hp.TNewColumnOptions().WithColumnOption(common.DefaultTextFieldName, hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query}).TWithValidData(validData)) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema)) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) // Test exact phrase match (slop=0) expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query) queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr)) common.CheckErr(t, err, true) // Results may vary as we're using auto-generated data, but it should >= 1, since query text has been inserted require.GreaterOrEqual(t, queryRes.ResultCount, 1) exprNull := fmt.Sprintf("%s is null", common.DefaultTextFieldName) queryRes, err = mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprNull)) common.CheckErr(t, err, true) require.GreaterOrEqual(t, common.DefaultNb/2, 1) } // TestPhraseMatchDefault tests basic phrase match functionality with slop=0 func TestPhraseMatchDefaultValue(t *testing.T) { ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) mc := hp.CreateDefaultMilvusClient(ctx, t) // create -> insert -> flush -> index -> load defaultText := "milvus vector database" fieldsOption := hp.TNewFieldOptions().WithFieldOption(common.DefaultTextFieldName, hp.TNewFieldsOption().TWithDefaultValue(defaultText). TWithEnableAnalyzer(true).TWithEnableMatch(true)) prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, hp.TNewSchemaOption()) // insert data with all null validData := make([]bool, common.DefaultNb) for i := 0; i < common.DefaultNb; i++ { validData[i] = i%2 == 0 } insertOption := hp.TNewColumnOptions().WithColumnOption(common.DefaultTextFieldName, hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithValidData(validData)) prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) prepare.FlushData(ctx, t, mc, schema.CollectionName) prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema)) prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) exprText := fmt.Sprintf("TEXT_MATCH(%s, 'database vector')", common.DefaultTextFieldName) countRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprText).WithOutputFields(common.QueryCountFieldName)) common.CheckErr(t, err, true) count, _ := countRes.Fields[0].GetAsInt64(0) require.EqualValues(t, common.DefaultNb/2, count) exprPhase := fmt.Sprintf("phrase_match(%s, 'database vector', 1)", common.DefaultTextFieldName) countRes, err = mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprPhase).WithOutputFields(common.QueryCountFieldName)) common.CheckErr(t, err, true) count, _ = countRes.Fields[0].GetAsInt64(0) require.EqualValues(t, 0, count) }