diff --git a/tests/go_client/README.md b/tests/go_client/README.md index ff85fa1fc7..e54244b5d5 100644 --- a/tests/go_client/README.md +++ b/tests/go_client/README.md @@ -1 +1,191 @@ -## go_client \ No newline at end of file +# Milvus Go Client Test Framework + +## Overview +This is a comprehensive test framework for the Milvus Go Client, designed to validate various functionalities of the Milvus vector database client. The framework provides a structured approach to writing tests with reusable components and helper functions. + +## Framework Architecture + +### Directory Structure +``` +/go_client/ +├── testcases/ # Main test cases +│ ├── helper/ # Helper functions and utilities +│ │ ├── helper.go +│ │ ├── data_helper.go +│ │ └── collection_helper.go +│ ├── search_test.go # Search functionality tests +│ ├── index_test.go # Index management tests +│ └── ... +├── common/ # Common utilities and constants +└── base/ # Base infrastructure code +``` + +### Key Components +- **Collection Preparation**: Utilities for creating and managing collections +- **Data Generation**: Tools for generating test data +- **Helper Functions**: Common operations and validations +- **Test Cases**: Organized by functionality + +## Writing Test Cases + +### Basic Test Structure +```go +func TestYourFeature(t *testing.T) { + // 1. Setup context and client + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // 2. Prepare collection + prepare, schema := hp.CollPrepare.CreateCollection( + ctx, t, mc, + hp.NewCreateCollectionParams(hp.Int64Vec), + hp.TNewFieldsOption(), + hp.TNewSchemaOption(), + ) + + // 3. Insert test data + prepare.InsertData(ctx, t, mc, + hp.NewInsertParams(schema), + hp.TNewDataOption(), + ) + + // 4. Execute test operations + // ... your test logic here ... + + // 5. Validate results + require.NoError(t, err) + require.Equal(t, expected, actual) +} +``` + +### Using Custom Parameters + +1. **Collection Creation Parameters** +```go +fieldsOption := hp.TNewFieldsOption(). + TWithEnableAnalyzer(true). + TWithAnalyzerParams(map[string]any{ + "tokenizer": "standard", + }) + +schemaOption := hp.TNewSchemaOption(). + TWithEnableDynamicField(true). + TWithDescription("Custom schema"). + TWithAutoID(false) +``` + +2. **Data Insertion Options** +```go +insertOption := hp.TNewDataOption(). + TWithNb(1000). // Number of records + TWithDim(128). // Vector dimension + TWithStart(100). // Starting ID + TWithMaxLen(256). // Maximum length + TWithTextLang("en") // Text language +``` + +3. **Index Parameters** +```go +indexParams := hp.TNewIndexParams(schema). + TWithFieldIndex(map[string]index.Index{ + common.DefaultVectorFieldName: index.NewIVFSQIndex( + &index.IVFSQConfig{ + MetricType: entity.L2, + NList: 128, + }, + ), + }) +``` + +4. **Search Parameters** +```go +searchOpt := client.NewSearchOption(schema.CollectionName, 100, vectors). + WithOffset(0). + WithLimit(100). + WithConsistencyLevel(entity.ClStrong). + WithFilter("int64 >= 100"). + WithOutputFields([]string{"*"}). + WithSearchParams(map[string]any{ + "nprobe": 16, + "ef": 64, + }) +``` + +## Adding New Parameters + +1. **Define New Option Type** +```go +// In helper/data_helper.go +type YourNewOption struct { + newParam1 string + newParam2 int +} +``` + +2. **Add Constructor** +```go +func TNewYourOption() *YourNewOption { + return &YourNewOption{ + newParam1: "default", + newParam2: 0, + } +} +``` + +3. **Add Parameter Methods** +```go +func (opt *YourNewOption) TWithNewParam1(value string) *YourNewOption { + opt.newParam1 = value + return opt +} + +func (opt *YourNewOption) TWithNewParam2(value int) *YourNewOption { + opt.newParam2 = value + return opt +} +``` + +## Best Practices + +1. **Test Organization** + - Group related tests in the same file + - Use clear and descriptive test names + - Add comments explaining test purpose + +2. **Data Generation** + - Use helper functions for generating test data + - Ensure data is appropriate for the test case + - Clean up test data after use + +3. **Error Handling** + - Use `common.CheckErr` for consistent error checking + - Test both success and failure scenarios + - Validate error messages when appropriate + +4. **Performance Considerations** + - Use appropriate timeouts + - Clean up resources after tests + - Consider test execution time + +## Running Tests + +```bash +# Run all tests +go test ./testcases/... + +# Run specific test +go test -run TestYourFeature ./testcases/ + +# Run with verbose output +go test -v ./testcases/... +``` + +## Contributing +1. Follow the existing code structure +2. Add comprehensive test cases +3. Document new parameters and options +4. Update this README for significant changes +5. Ensure code quality standards: + - Run `golangci-lint run` to check for style mistakes + - Use `gofmt -w your/code/path` to format your code before submitting + - CI will verify both golint and go format compliance \ No newline at end of file diff --git a/tests/go_client/common/consts.go b/tests/go_client/common/consts.go index fe97df0e68..6b2d68a66e 100644 --- a/tests/go_client/common/consts.go +++ b/tests/go_client/common/consts.go @@ -4,33 +4,35 @@ import "github.com/milvus-io/milvus/client/v2/index" // cost default field name const ( - DefaultInt8FieldName = "int8" - DefaultInt16FieldName = "int16" - DefaultInt32FieldName = "int32" - DefaultInt64FieldName = "int64" - DefaultBoolFieldName = "bool" - DefaultFloatFieldName = "float" - DefaultDoubleFieldName = "double" - DefaultVarcharFieldName = "varchar" - DefaultJSONFieldName = "json" - DefaultArrayFieldName = "array" - DefaultFloatVecFieldName = "floatVec" - DefaultBinaryVecFieldName = "binaryVec" - DefaultFloat16VecFieldName = "fp16Vec" - DefaultBFloat16VecFieldName = "bf16Vec" - DefaultSparseVecFieldName = "sparseVec" - DefaultDynamicNumberField = "dynamicNumber" - DefaultDynamicStringField = "dynamicString" - DefaultDynamicBoolField = "dynamicBool" - DefaultDynamicListField = "dynamicList" - DefaultBoolArrayField = "boolArray" - DefaultInt8ArrayField = "int8Array" - DefaultInt16ArrayField = "int16Array" - DefaultInt32ArrayField = "int32Array" - DefaultInt64ArrayField = "int64Array" - DefaultFloatArrayField = "floatArray" - DefaultDoubleArrayField = "doubleArray" - DefaultVarcharArrayField = "varcharArray" + DefaultInt8FieldName = "int8" + DefaultInt16FieldName = "int16" + DefaultInt32FieldName = "int32" + DefaultInt64FieldName = "int64" + DefaultBoolFieldName = "bool" + DefaultFloatFieldName = "float" + DefaultDoubleFieldName = "double" + DefaultTextFieldName = "text" + DefaultVarcharFieldName = "varchar" + DefaultJSONFieldName = "json" + DefaultArrayFieldName = "array" + DefaultFloatVecFieldName = "floatVec" + DefaultBinaryVecFieldName = "binaryVec" + DefaultFloat16VecFieldName = "fp16Vec" + DefaultBFloat16VecFieldName = "bf16Vec" + DefaultTextSparseVecFieldName = "textSparseVec" + DefaultSparseVecFieldName = "sparseVec" + DefaultDynamicNumberField = "dynamicNumber" + DefaultDynamicStringField = "dynamicString" + DefaultDynamicBoolField = "dynamicBool" + DefaultDynamicListField = "dynamicList" + DefaultBoolArrayField = "boolArray" + DefaultInt8ArrayField = "int8Array" + DefaultInt16ArrayField = "int16Array" + DefaultInt32ArrayField = "int32Array" + DefaultInt64ArrayField = "int64Array" + DefaultFloatArrayField = "floatArray" + DefaultDoubleArrayField = "doubleArray" + DefaultVarcharArrayField = "varcharArray" ) // cost for test cases @@ -86,3 +88,8 @@ const ( DatabaseForceDenyReading = "database.force.deny.reading" DatabaseDiskQuotaMb = "database.diskQuota.mb" ) + +// const for full text search +const ( + DefaultTextLang = "en" +) diff --git a/tests/go_client/common/utils.go b/tests/go_client/common/utils.go index 76722bc200..1691329671 100644 --- a/tests/go_client/common/utils.go +++ b/tests/go_client/common/utils.go @@ -153,3 +153,59 @@ var InvalidExpressions = []InvalidExprStruct{ {Expr: fmt.Sprintf("%s[-1] > %d", DefaultInt8ArrayField, TestCapacity), ErrNil: false, ErrMsg: "cannot parse expression"}, // array[-1] > {Expr: fmt.Sprintf("%s[-1] > 1", DefaultJSONFieldName), ErrNil: false, ErrMsg: "invalid expression"}, // json[-1] > } + +// Language constants for text generation +const ( + English = "en" + Chinese = "zh" +) + +func GenText(lang string) string { + englishTopics := []string{ + "information retrieval", "data mining", "machine learning", + "natural language processing", "text analysis", "search engines", + "document indexing", "query processing", "relevance ranking", + "semantic search", + } + englishVerbs := []string{ + "is", "focuses on", "deals with", "involves", "combines", + "utilizes", "improves", "enables", "enhances", "supports", + } + englishObjects := []string{ + "large datasets", "text documents", "user queries", "search results", + "information needs", "relevance scores", "ranking algorithms", + "index structures", "query expansion", "document collections", + } + + chineseTopics := []string{ + "信息检索", "数据挖掘", "机器学习", + "自然语言处理", "文本分析", "搜索引擎", + "文档索引", "查询处理", "相关性排序", + "语义搜索", + } + chineseVerbs := []string{ + "是", "专注于", "处理", "涉及", "结合", + "利用", "改进", "实现", "提升", "支持", + } + chineseObjects := []string{ + "大规模数据集", "文本文档", "用户查询", "搜索结果", + "信息需求", "相关性分数", "排序算法", + "索引结构", "查询扩展", "文档集合", + } + + var topic, verb, object string + switch lang { + case English: + topic = englishTopics[rand.Intn(len(englishTopics))] + verb = englishVerbs[rand.Intn(len(englishVerbs))] + object = englishObjects[rand.Intn(len(englishObjects))] + return fmt.Sprintf("%s %s %s", topic, verb, object) + case Chinese: + topic = chineseTopics[rand.Intn(len(chineseTopics))] + verb = chineseVerbs[rand.Intn(len(chineseVerbs))] + object = chineseObjects[rand.Intn(len(chineseObjects))] + return fmt.Sprintf("%s%s%s", topic, verb, object) + default: + return "Unsupported language" + } +} diff --git a/tests/go_client/testcases/full_text_search_test.go b/tests/go_client/testcases/full_text_search_test.go new file mode 100644 index 0000000000..5aeed1ac4f --- /dev/null +++ b/tests/go_client/testcases/full_text_search_test.go @@ -0,0 +1,283 @@ +package testcases + +import ( + "testing" + "time" + + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/client/v2/index" + "github.com/milvus-io/milvus/client/v2/milvusclient" + "github.com/milvus-io/milvus/tests/go_client/common" + hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" +) + +func TestFullTextSearchDefault(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // create -> insert -> flush -> index -> load + analyzerParams := map[string]any{"tokenizer": "standard"} + fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) + function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) + schemaOption := hp.TNewSchemaOption().TWithFunction(function) + prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) + insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang) + prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) + prepare.FlushData(ctx, t, mc, schema.CollectionName) + + indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) + prepare.CreateIndex(ctx, t, mc, indexparams) + prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + queries := hp.GenFullTextQuery(common.DefaultNq, common.DefaultTextLang) + vectors := make([]entity.Vector, 0, len(queries)) + for _, query := range queries { + vectors = append(vectors, entity.Text(query)) + } + resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit) +} + +// TestSearchFullTextBase tests basic full text search functionality with different languages +func TestSearchFullTextWithDiffLang(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // Test cases for different languages and analyzers + testCases := []struct { + name string + language string + analyzer string + query string + numRows int + topK int + }{ + { + name: "English_Standard", + language: "english", + analyzer: "standard", + query: "what is information retrieval and its applications?", + numRows: 3000, + topK: 10, + }, + { + name: "Chinese_Jieba", + language: "chinese", + analyzer: "jieba", + query: "信息检索的应用", + numRows: 3000, + topK: 10, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + analyzerParams := map[string]any{"tokenizer": tc.analyzer} + fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) + function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) + schemaOption := hp.TNewSchemaOption().TWithFunction(function) + prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) + insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows) + prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) + prepare.FlushData(ctx, t, mc, schema.CollectionName) + + indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) + prepare.CreateIndex(ctx, t, mc, indexparams) + prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + queries := []string{tc.query} + vectors := make([]entity.Vector, 0, len(queries)) + for _, query := range queries { + vectors = append(vectors, entity.Text(query)) + } + resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, len(queries), tc.topK) + }) + } +} + +// TestSearchFullTextWithDynamicField tests full text search with dynamic field enabled +func TestSearchFullTextWithDynamicField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + // Test cases for different languages and analyzers + testCases := []struct { + name string + language string + analyzer string + query string + numRows int + topK int + }{ + { + name: "English_Standard", + language: "english", + analyzer: "standard", + query: "what is information retrieval and its applications?", + numRows: 1000, + topK: 5, + }, + { + name: "Chinese_Jieba", + language: "chinese", + analyzer: "jieba", + query: "信息检索的应用", + numRows: 1000, + topK: 5, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + analyzerParams := map[string]any{"tokenizer": tc.analyzer} + fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams) + function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) + schemaOption := hp.TNewSchemaOption().TWithFunction(function).TWithEnableDynamicField(true) + prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) + insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows) + prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) + prepare.FlushData(ctx, t, mc, schema.CollectionName) + + indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) + prepare.CreateIndex(ctx, t, mc, indexparams) + prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + queries := []string{tc.query} + vectors := make([]entity.Vector, 0, len(queries)) + for _, query := range queries { + vectors = append(vectors, entity.Text(query)) + } + resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, len(queries), tc.topK) + }) + } +} + +// TestSearchFullTextWithPartitionKey tests full text search with partition key +func TestSearchFullTextWithPartitionKey(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // Test cases for different languages and analyzers + testCases := []struct { + name string + language string + analyzer string + query string + numRows int + topK int + }{ + { + name: "English_Standard", + language: "english", + analyzer: "standard", + query: "what is information retrieval and its applications?", + numRows: 1000, + topK: 5, + }, + { + name: "Chinese_Jieba", + language: "chinese", + analyzer: "jieba", + query: "信息检索的应用", + numRows: 1000, + topK: 5, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + analyzerParams := map[string]any{"tokenizer": tc.analyzer} + fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true) + function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) + schemaOption := hp.TNewSchemaOption().TWithFunction(function) + prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) + insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows) + prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) + prepare.FlushData(ctx, t, mc, schema.CollectionName) + + indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) + prepare.CreateIndex(ctx, t, mc, indexparams) + prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + queries := []string{tc.query} + vectors := make([]entity.Vector, 0, len(queries)) + for _, query := range queries { + vectors = append(vectors, entity.Text(query)) + } + resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, len(queries), tc.topK) + }) + } +} + +// TestSearchFullTextWithEmptyData tests full text search with empty data +func TestSearchFullTextWithEmptyData(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // Test cases for different empty percent + testCases := []struct { + name string + language string + analyzer string + query string + numRows int + topK int + emptyPercent int + }{ + { + name: "English_Standard", + language: "english", + analyzer: "standard", + query: "what is information retrieval and its applications?", + numRows: 3000, + topK: 5, + emptyPercent: 50, + }, + { + name: "Chinese_Jieba", + language: "chinese", + analyzer: "jieba", + query: "信息检索的应用", + numRows: 3000, + topK: 5, + emptyPercent: 80, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + analyzerParams := map[string]any{"tokenizer": tc.analyzer} + fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true) + function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName) + schemaOption := hp.TNewSchemaOption().TWithFunction(function) + prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption) + insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows).TWithTextEmptyPercent(tc.emptyPercent) + prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption) + prepare.FlushData(ctx, t, mc, schema.CollectionName) + + indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)}) + prepare.CreateIndex(ctx, t, mc, indexparams) + prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + queries := []string{tc.query} + vectors := make([]entity.Vector, 0, len(queries)) + for _, query := range queries { + vectors = append(vectors, entity.Text(query)) + } + resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, len(queries), tc.topK) + }) + } +} diff --git a/tests/go_client/testcases/helper/data_helper.go b/tests/go_client/testcases/helper/data_helper.go index a15d2a1447..e933339ea6 100644 --- a/tests/go_client/testcases/helper/data_helper.go +++ b/tests/go_client/testcases/helper/data_helper.go @@ -3,6 +3,8 @@ package helper import ( "bytes" "encoding/json" + "math/rand" + "slices" "strconv" "go.uber.org/zap" @@ -38,14 +40,16 @@ func (opt *InsertParams) TWithIsRows(isRows bool) *InsertParams { // GenColumnDataOption -- create column data -- type GenDataOption struct { - nb int - start int - dim int - maxLen int - sparseMaxLen int - maxCapacity int - elementType entity.FieldType - fieldName string + nb int + start int + dim int + maxLen int + sparseMaxLen int + maxCapacity int + elementType entity.FieldType + fieldName string + textLang string + textEmptyPercent int } func (opt *GenDataOption) TWithNb(nb int) *GenDataOption { @@ -88,15 +92,28 @@ func (opt *GenDataOption) TWithElementType(eleType entity.FieldType) *GenDataOpt return opt } +func (opt *GenDataOption) TWithTextLang(lang string) *GenDataOption { + opt.textLang = lang + return opt +} + +func (opt *GenDataOption) TWithTextEmptyPercent(percent int) *GenDataOption { + opt.textEmptyPercent = percent + return opt +} + func TNewDataOption() *GenDataOption { return &GenDataOption{ - nb: common.DefaultNb, - start: 0, - dim: common.DefaultDim, - maxLen: common.TestMaxLen, - sparseMaxLen: common.TestMaxLen, - maxCapacity: common.TestCapacity, - elementType: entity.FieldTypeNone, + nb: common.DefaultNb, + start: 0, + dim: common.DefaultDim, + maxLen: common.TestMaxLen, + sparseMaxLen: common.TestMaxLen, + maxCapacity: common.TestCapacity, + elementType: entity.FieldTypeNone, + fieldName: "", + textLang: "", + textEmptyPercent: 0, } } @@ -310,8 +327,35 @@ func GenColumnData(nb int, fieldType entity.FieldType, option GenDataOption) col case entity.FieldTypeVarChar: varcharValues := make([]string, 0, nb) - for i := start; i < start+nb; i++ { - varcharValues = append(varcharValues, strconv.Itoa(i)) + if option.textLang != "" { + // Use language-specific text generation + var lang string + switch option.textLang { + case "en", "english": + lang = "en" + case "zh", "chinese": + lang = "zh" + default: + // Fallback to sequential numbers for unsupported languages + for i := start; i < start+nb; i++ { + varcharValues = append(varcharValues, strconv.Itoa(i)) + } + return column.NewColumnVarChar(fieldName, varcharValues) + } + + // Generate text data with empty values based on textEmptyPercent + for i := 0; i < nb; i++ { + if rand.Float64()*100 < float64(option.textEmptyPercent) { + varcharValues = append(varcharValues, "") + } else { + varcharValues = append(varcharValues, common.GenText(lang)) + } + } + } else { + // Default behavior: sequential numbers + for i := start; i < start+nb; i++ { + varcharValues = append(varcharValues, strconv.Itoa(i)) + } } return column.NewColumnVarChar(fieldName, varcharValues) @@ -449,6 +493,16 @@ func MergeColumnsToDynamic(nb int, columns []column.Column, columnName string) * return jsonColumn } +func GetBm25FunctionsOutputFields(schema *entity.Schema) []string { + var outputFields []string + for _, fn := range schema.Functions { + if fn.Type == entity.FunctionTypeBM25 { + outputFields = append(outputFields, fn.OutputFieldNames...) + } + } + return outputFields +} + func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]column.Column, []column.Column) { if nil == schema || schema.CollectionName == "" { log.Fatal("[GenColumnsBasedSchema] Nil Schema is not expected") @@ -463,6 +517,12 @@ func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]colu if field.AutoID { continue } + if slices.Contains(GetBm25FunctionsOutputFields(schema), field.Name) { + continue + } + log.Info("GenColumnsBasedSchema", zap.Any("field", field)) + // set field name to option + option.TWithFieldName(field.Name) columns = append(columns, GenColumnData(option.nb, field.DataType, *option)) } if schema.EnableDynamicField { diff --git a/tests/go_client/testcases/helper/field_helper.go b/tests/go_client/testcases/helper/field_helper.go index a99f17e456..3396f0ee6b 100644 --- a/tests/go_client/testcases/helper/field_helper.go +++ b/tests/go_client/testcases/helper/field_helper.go @@ -107,6 +107,7 @@ const ( Int64MultiVec CollectionFieldsType = 6 // int64 + floatVec + binaryVec + fp16Vec + bf16vec AllFields CollectionFieldsType = 7 // all fields excepted sparse Int64VecAllScalar CollectionFieldsType = 8 // int64 + floatVec + all scalar fields + FullTextSearch CollectionFieldsType = 9 // int64 + varchar + sparse vector + analyzer + function ) type GenFieldsOption struct { @@ -116,6 +117,8 @@ type GenFieldsOption struct { MaxLength int64 // varchar len or array capacity MaxCapacity int64 IsPartitionKey bool + EnableAnalyzer bool + AnalyzerParams map[string]any ElementType entity.FieldType } @@ -127,6 +130,8 @@ func TNewFieldsOption() *GenFieldsOption { MaxCapacity: common.TestCapacity, IsDynamic: false, IsPartitionKey: false, + EnableAnalyzer: false, + AnalyzerParams: make(map[string]any), ElementType: entity.FieldTypeNone, } } @@ -166,6 +171,16 @@ func (opt *GenFieldsOption) TWithMaxCapacity(maxCapacity int64) *GenFieldsOption return opt } +func (opt *GenFieldsOption) TWithEnableAnalyzer(enableAnalyzer bool) *GenFieldsOption { + opt.EnableAnalyzer = enableAnalyzer + return opt +} + +func (opt *GenFieldsOption) TWithAnalyzerParams(analyzerParams map[string]any) *GenFieldsOption { + opt.AnalyzerParams = analyzerParams + return opt +} + // factory type FieldsFactory struct{} @@ -341,6 +356,23 @@ func (cf FieldsInt64VecAllScalar) GenFields(option GenFieldsOption) []*entity.Fi return fields } +type FieldsFullTextSearch struct{} + +func (cf FieldsFullTextSearch) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + textField := entity.NewField().WithName(common.DefaultTextFieldName).WithDataType(entity.FieldTypeVarChar).WithMaxLength(option.MaxLength).WithIsPartitionKey(option.IsPartitionKey).WithEnableAnalyzer(true).WithAnalyzerParams(option.AnalyzerParams) + sparseVecField := entity.NewField().WithName(common.DefaultTextSparseVecFieldName).WithDataType(entity.FieldTypeSparseVector) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + fields := []*entity.Field{ + pkField, + textField, + sparseVecField, + } + return fields +} + func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFieldsType, option *GenFieldsOption) []*entity.Field { log.Info("GenFieldsForCollection", zap.Any("GenFieldsOption", option)) switch collectionFieldsType { @@ -360,6 +392,8 @@ func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFi return FieldsAllFields{}.GenFields(*option) case Int64VecAllScalar: return FieldsInt64VecAllScalar{}.GenFields(*option) + case FullTextSearch: + return FieldsFullTextSearch{}.GenFields(*option) default: return FieldsInt64Vec{}.GenFields(*option) } diff --git a/tests/go_client/testcases/helper/function_helper.go b/tests/go_client/testcases/helper/function_helper.go new file mode 100644 index 0000000000..6f289d6cfa --- /dev/null +++ b/tests/go_client/testcases/helper/function_helper.go @@ -0,0 +1,14 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" +) + +// TNewBM25Function creates a new BM25 function with the given input and output fields +func TNewBM25Function(inputField, outputField string) *entity.Function { + return entity.NewFunction(). + WithName(inputField + "_bm25_emb"). + WithInputFields(inputField). + WithOutputFields(outputField). + WithType(entity.FunctionTypeBM25) +} diff --git a/tests/go_client/testcases/helper/helper.go b/tests/go_client/testcases/helper/helper.go index 2c7e74c326..142533cd12 100644 --- a/tests/go_client/testcases/helper/helper.go +++ b/tests/go_client/testcases/helper/helper.go @@ -135,6 +135,8 @@ func (chainTask *CollectionPrepare) InsertData(ctx context.Context, t *testing.T if nil == ip.Schema || ip.Schema.CollectionName == "" { log.Fatal("[InsertData] Nil Schema is not expected") } + // print option + log.Info("GenDataOption", zap.Any("option", option)) columns, dynamicColumns := GenColumnsBasedSchema(ip.Schema, option) insertOpt := clientv2.NewColumnBasedInsertOption(ip.Schema.CollectionName).WithColumns(columns...).WithColumns(dynamicColumns...) if ip.PartitionName != "" { diff --git a/tests/go_client/testcases/helper/index_helper.go b/tests/go_client/testcases/helper/index_helper.go index a2c034537f..043c791b2c 100644 --- a/tests/go_client/testcases/helper/index_helper.go +++ b/tests/go_client/testcases/helper/index_helper.go @@ -55,6 +55,10 @@ var SupportBinIvfFlatMetricType = []entity.MetricType{ entity.HAMMING, } +var SupportFullTextSearchMetricsType = []entity.MetricType{ + entity.BM25, +} + var UnsupportedSparseVecMetricsType = []entity.MetricType{ entity.L2, entity.COSINE, diff --git a/tests/go_client/testcases/helper/read_helper.go b/tests/go_client/testcases/helper/read_helper.go index 754258e9c0..3d48d8d966 100644 --- a/tests/go_client/testcases/helper/read_helper.go +++ b/tests/go_client/testcases/helper/read_helper.go @@ -66,6 +66,14 @@ func GenSearchVectors(nq int, dim int, dataType entity.FieldType) []entity.Vecto return vectors } +func GenFullTextQuery(nq int, lang string) []string { + queries := make([]string, 0, nq) + for i := 0; i < nq; i++ { + queries = append(queries, common.GenText(lang)) + } + return queries +} + func GenFp16OrBf16VectorsFromFloatVector(nq int, dim int, dataType entity.FieldType) []entity.Vector { vectors := make([]entity.Vector, 0, nq) switch dataType { diff --git a/tests/go_client/testcases/helper/schema_helper.go b/tests/go_client/testcases/helper/schema_helper.go index d96e567a28..347a5e1a38 100644 --- a/tests/go_client/testcases/helper/schema_helper.go +++ b/tests/go_client/testcases/helper/schema_helper.go @@ -12,6 +12,7 @@ type GenSchemaOption struct { AutoID bool Fields []*entity.Field EnableDynamicField bool + Function *entity.Function } func TNewSchemaOption() *GenSchemaOption { @@ -43,6 +44,11 @@ func (opt *GenSchemaOption) TWithFields(fields []*entity.Field) *GenSchemaOption return opt } +func (opt *GenSchemaOption) TWithFunction(function *entity.Function) *GenSchemaOption { + opt.Function = function + return opt +} + func GenSchema(option *GenSchemaOption) *entity.Schema { if len(option.Fields) == 0 { log.Fatal("Require at least a primary field and a vector field") @@ -64,5 +70,8 @@ func GenSchema(option *GenSchemaOption) *entity.Schema { if option.EnableDynamicField { schema.WithDynamicFieldEnabled(option.EnableDynamicField) } + if option.Function != nil { + schema.WithFunction(option.Function) + } return schema }