mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
test:add text embedding function testcases in go client (#43875)
/kind improvement --------- Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
parent
c102fa8b0b
commit
1e31ad345b
@ -4,7 +4,10 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"net/http"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -539,6 +542,154 @@ func GetBm25FunctionsOutputFields(schema *entity.Schema) []string {
|
|||||||
return outputFields
|
return outputFields
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetTextEmbeddingFunctionsOutputFields(schema *entity.Schema) []string {
|
||||||
|
var outputFields []string
|
||||||
|
for _, fn := range schema.Functions {
|
||||||
|
if fn.Type == entity.FunctionTypeTextEmbedding {
|
||||||
|
outputFields = append(outputFields, fn.OutputFieldNames...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputFields
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAllFunctionsOutputFields(schema *entity.Schema) []string {
|
||||||
|
var outputFields []string
|
||||||
|
for _, fn := range schema.Functions {
|
||||||
|
if fn.Type == entity.FunctionTypeBM25 || fn.Type == entity.FunctionTypeTextEmbedding {
|
||||||
|
outputFields = append(outputFields, fn.OutputFieldNames...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputFields
|
||||||
|
}
|
||||||
|
|
||||||
|
// GenTextDocuments generates realistic text documents for embedding tests
|
||||||
|
func GenTextDocuments(count int, lang string) []string {
|
||||||
|
documents := make([]string, count)
|
||||||
|
|
||||||
|
var templates []string
|
||||||
|
switch lang {
|
||||||
|
case "english", "en":
|
||||||
|
templates = []string{
|
||||||
|
"This is a document about artificial intelligence and machine learning technologies in modern computing systems",
|
||||||
|
"Vector databases enable efficient similarity search for high-dimensional data in AI applications",
|
||||||
|
"Text embeddings transform natural language into numerical representations for semantic understanding",
|
||||||
|
"Information retrieval systems help users find relevant documents from large collections of data",
|
||||||
|
"Natural language processing enables computers to understand and generate human language effectively",
|
||||||
|
"Database management systems provide structured storage and efficient querying of information",
|
||||||
|
"Search algorithms rank and retrieve the most relevant results for user queries",
|
||||||
|
"Machine learning models learn patterns from data to make predictions and classifications",
|
||||||
|
"Deep learning neural networks process complex patterns in images, text, and other data types",
|
||||||
|
"Data science combines statistics, programming, and domain knowledge to extract insights",
|
||||||
|
}
|
||||||
|
case "chinese", "zh":
|
||||||
|
templates = []string{
|
||||||
|
"这是关于人工智能和机器学习技术的文档,介绍现代计算系统中的应用",
|
||||||
|
"向量数据库为高维数据提供高效的相似性搜索功能,支持AI应用开发",
|
||||||
|
"文本嵌入技术将自然语言转换为数值表示,实现语义理解和分析",
|
||||||
|
"信息检索系统帮助用户从大规模数据集合中找到相关的文档内容",
|
||||||
|
"自然语言处理技术使计算机能够理解和生成人类语言",
|
||||||
|
"数据库管理系统提供结构化存储和高效的信息查询功能",
|
||||||
|
"搜索算法对用户查询结果进行排序和检索,返回最相关的内容",
|
||||||
|
"机器学习模型从数据中学习模式,进行预测和分类任务",
|
||||||
|
"深度学习神经网络处理图像、文本等复杂数据类型中的模式",
|
||||||
|
"数据科学结合统计学、编程和领域知识来提取有价值的洞察",
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Default to English
|
||||||
|
templates = []string{
|
||||||
|
"Document about technology and innovation in the digital age",
|
||||||
|
"Analysis of modern computing systems and their applications",
|
||||||
|
"Research on data processing and information management",
|
||||||
|
"Study of algorithms and their implementation in software",
|
||||||
|
"Overview of database systems and their optimization techniques",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
baseTemplate := templates[i%len(templates)]
|
||||||
|
documents[i] = fmt.Sprintf("%s. Document ID: %d", baseTemplate, i)
|
||||||
|
}
|
||||||
|
|
||||||
|
return documents
|
||||||
|
}
|
||||||
|
|
||||||
|
// CosineSimilarity calculates cosine similarity between two float32 vectors
|
||||||
|
func CosineSimilarity(a, b []float32) float32 {
|
||||||
|
if len(a) != len(b) || len(a) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var dotProduct, normA, normB float32
|
||||||
|
for i := 0; i < len(a); i++ {
|
||||||
|
dotProduct += a[i] * b[i]
|
||||||
|
normA += a[i] * a[i]
|
||||||
|
normB += b[i] * b[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
if normA == 0 || normB == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use math.Sqrt for more accurate calculation
|
||||||
|
return dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB))))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GenLongText generates long text with specified word count
|
||||||
|
func GenLongText(wordCount int, lang string) string {
|
||||||
|
var words []string
|
||||||
|
switch lang {
|
||||||
|
case "chinese", "zh":
|
||||||
|
words = []string{"人工智能", "机器学习", "深度学习", "神经网络", "数据挖掘", "自然语言", "处理技术", "计算机", "算法优化", "信息检索", "向量数据库", "语义搜索", "文本分析", "知识图谱", "智能系统"}
|
||||||
|
case "english", "en":
|
||||||
|
words = []string{"artificial", "intelligence", "machine", "learning", "deep", "neural", "network", "algorithm", "database", "search", "vector", "embedding", "semantic", "analysis", "information", "retrieval", "computing", "technology", "system", "data", "processing", "optimization", "performance", "scalability", "efficiency"}
|
||||||
|
default:
|
||||||
|
words = []string{"the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog", "and", "runs", "through", "forest", "with", "great", "speed", "while", "chasing", "rabbit", "under", "bright", "moonlight", "across", "green", "fields", "toward", "distant", "mountains"}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]string, wordCount)
|
||||||
|
for i := 0; i < wordCount; i++ {
|
||||||
|
result[i] = words[i%len(words)]
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(result, " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// CallTEIDirectly calls TEI endpoint directly to get embeddings
|
||||||
|
func CallTEIDirectly(endpoint string, texts []string) ([][]float32, error) {
|
||||||
|
// TEI API request structure
|
||||||
|
type TEIRequest struct {
|
||||||
|
Inputs []string `json:"inputs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create request
|
||||||
|
reqBody := TEIRequest{Inputs: texts}
|
||||||
|
jsonData, err := json.Marshal(reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make HTTP request to TEI
|
||||||
|
resp, err := http.Post(endpoint+"/embed", "application/json", bytes.NewBuffer(jsonData))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to call TEI endpoint: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Read response
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse response - TEI returns array of arrays
|
||||||
|
var embeddings [][]float32
|
||||||
|
if err := json.Unmarshal(body, &embeddings); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return embeddings, nil
|
||||||
|
}
|
||||||
|
|
||||||
func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]column.Column, []column.Column) {
|
func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]column.Column, []column.Column) {
|
||||||
if nil == schema || schema.CollectionName == "" {
|
if nil == schema || schema.CollectionName == "" {
|
||||||
log.Fatal("[GenColumnsBasedSchema] Nil Schema is not expected")
|
log.Fatal("[GenColumnsBasedSchema] Nil Schema is not expected")
|
||||||
@ -557,7 +708,7 @@ func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]colu
|
|||||||
if option.fieldName == "" {
|
if option.fieldName == "" {
|
||||||
option.fieldName = field.Name
|
option.fieldName = field.Name
|
||||||
}
|
}
|
||||||
if slices.Contains(GetBm25FunctionsOutputFields(schema), field.Name) {
|
if slices.Contains(GetAllFunctionsOutputFields(schema), field.Name) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Info("GenColumnsBasedSchema", zap.Any("field", field))
|
log.Info("GenColumnsBasedSchema", zap.Any("field", field))
|
||||||
|
|||||||
@ -99,15 +99,16 @@ type CollectionFieldsType int32
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// FieldTypeNone zero value place holder
|
// FieldTypeNone zero value place holder
|
||||||
Int64Vec CollectionFieldsType = 1 // int64 + floatVec
|
Int64Vec CollectionFieldsType = 1 // int64 + floatVec
|
||||||
VarcharBinary CollectionFieldsType = 2 // varchar + binaryVec
|
VarcharBinary CollectionFieldsType = 2 // varchar + binaryVec
|
||||||
Int64VecJSON CollectionFieldsType = 3 // int64 + floatVec + json
|
Int64VecJSON CollectionFieldsType = 3 // int64 + floatVec + json
|
||||||
Int64VecArray CollectionFieldsType = 4 // int64 + floatVec + array
|
Int64VecArray CollectionFieldsType = 4 // int64 + floatVec + array
|
||||||
Int64VarcharSparseVec CollectionFieldsType = 5 // int64 + varchar + sparse vector
|
Int64VarcharSparseVec CollectionFieldsType = 5 // int64 + varchar + sparse vector
|
||||||
Int64MultiVec CollectionFieldsType = 6 // int64 + floatVec + binaryVec + fp16Vec + bf16vec
|
Int64MultiVec CollectionFieldsType = 6 // int64 + floatVec + binaryVec + fp16Vec + bf16vec
|
||||||
AllFields CollectionFieldsType = 7 // all fields excepted sparse
|
AllFields CollectionFieldsType = 7 // all fields excepted sparse
|
||||||
Int64VecAllScalar CollectionFieldsType = 8 // int64 + floatVec + all scalar fields
|
Int64VecAllScalar CollectionFieldsType = 8 // int64 + floatVec + all scalar fields
|
||||||
FullTextSearch CollectionFieldsType = 9 // int64 + varchar + sparse vector + analyzer + function
|
FullTextSearch CollectionFieldsType = 9 // int64 + varchar + sparse vector + analyzer + function
|
||||||
|
TextEmbedding CollectionFieldsType = 10 // int64 + varchar + float_vector + text_embedding_function
|
||||||
)
|
)
|
||||||
|
|
||||||
type GenFieldsOption struct {
|
type GenFieldsOption struct {
|
||||||
@ -373,6 +374,23 @@ func (cf FieldsFullTextSearch) GenFields(option GenFieldsOption) []*entity.Field
|
|||||||
return fields
|
return fields
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FieldsTextEmbedding struct{}
|
||||||
|
|
||||||
|
func (cf FieldsTextEmbedding) GenFields(option GenFieldsOption) []*entity.Field {
|
||||||
|
pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)
|
||||||
|
textField := entity.NewField().WithName("document").WithDataType(entity.FieldTypeVarChar).WithMaxLength(option.MaxLength).WithIsPartitionKey(option.IsPartitionKey)
|
||||||
|
vecField := entity.NewField().WithName("dense").WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim)
|
||||||
|
if option.AutoID {
|
||||||
|
pkField.WithIsAutoID(option.AutoID)
|
||||||
|
}
|
||||||
|
fields := []*entity.Field{
|
||||||
|
pkField,
|
||||||
|
textField,
|
||||||
|
vecField,
|
||||||
|
}
|
||||||
|
return fields
|
||||||
|
}
|
||||||
|
|
||||||
func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFieldsType, option *GenFieldsOption) []*entity.Field {
|
func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFieldsType, option *GenFieldsOption) []*entity.Field {
|
||||||
log.Info("GenFieldsForCollection", zap.Any("GenFieldsOption", option))
|
log.Info("GenFieldsForCollection", zap.Any("GenFieldsOption", option))
|
||||||
switch collectionFieldsType {
|
switch collectionFieldsType {
|
||||||
@ -394,7 +412,14 @@ func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFi
|
|||||||
return FieldsInt64VecAllScalar{}.GenFields(*option)
|
return FieldsInt64VecAllScalar{}.GenFields(*option)
|
||||||
case FullTextSearch:
|
case FullTextSearch:
|
||||||
return FieldsFullTextSearch{}.GenFields(*option)
|
return FieldsFullTextSearch{}.GenFields(*option)
|
||||||
|
case TextEmbedding:
|
||||||
|
return FieldsTextEmbedding{}.GenFields(*option)
|
||||||
default:
|
default:
|
||||||
return FieldsInt64Vec{}.GenFields(*option)
|
return FieldsInt64Vec{}.GenFields(*option)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TNewTextEmbeddingFieldsOption creates fields option with text embedding settings
|
||||||
|
func TNewTextEmbeddingFieldsOption() *GenFieldsOption {
|
||||||
|
return TNewFieldsOption().TWithDim(int64(GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
}
|
||||||
|
|||||||
@ -12,3 +12,19 @@ func TNewBM25Function(inputField, outputField string) *entity.Function {
|
|||||||
WithOutputFields(outputField).
|
WithOutputFields(outputField).
|
||||||
WithType(entity.FunctionTypeBM25)
|
WithType(entity.FunctionTypeBM25)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TNewTextEmbeddingFunction creates a text embedding function for different providers
|
||||||
|
func TNewTextEmbeddingFunction(inputField, outputField string, params map[string]any) *entity.Function {
|
||||||
|
function := entity.NewFunction().
|
||||||
|
WithName(inputField + "_text_emb").
|
||||||
|
WithInputFields(inputField).
|
||||||
|
WithOutputFields(outputField).
|
||||||
|
WithType(entity.FunctionTypeTextEmbedding)
|
||||||
|
|
||||||
|
// Add all parameters including provider
|
||||||
|
for key, value := range params {
|
||||||
|
function.WithParam(key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
return function
|
||||||
|
}
|
||||||
|
|||||||
@ -75,3 +75,12 @@ func GenSchema(option *GenSchemaOption) *entity.Schema {
|
|||||||
}
|
}
|
||||||
return schema
|
return schema
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TNewTextEmbeddingSchemaOption creates schema option with text embedding function
|
||||||
|
func TNewTextEmbeddingSchemaOption() *GenSchemaOption {
|
||||||
|
function := TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
return TNewSchemaOption().TWithFunction(function)
|
||||||
|
}
|
||||||
|
|||||||
@ -19,6 +19,8 @@ var (
|
|||||||
user = flag.String("user", "root", "user")
|
user = flag.String("user", "root", "user")
|
||||||
password = flag.String("password", "Milvus", "password")
|
password = flag.String("password", "Milvus", "password")
|
||||||
logLevel = flag.String("log.level", "info", "log level for test")
|
logLevel = flag.String("log.level", "info", "log level for test")
|
||||||
|
teiEndpoint = flag.String("tei_endpoint", "http://text-embeddings-service.milvus-ci.svc.cluster.local:80", "TEI service endpoint for text embedding tests")
|
||||||
|
teiModelDim = flag.Int("tei_model_dim", 768, "Vector dimension for text embedding model")
|
||||||
defaultClientConfig *client.ClientConfig
|
defaultClientConfig *client.ClientConfig
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -42,6 +44,14 @@ func GetPassword() string {
|
|||||||
return *password
|
return *password
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetTEIEndpoint() string {
|
||||||
|
return *teiEndpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetTEIModelDim() int {
|
||||||
|
return *teiModelDim
|
||||||
|
}
|
||||||
|
|
||||||
func parseLogConfig() {
|
func parseLogConfig() {
|
||||||
log.Info("Parser Log Level", zap.String("logLevel", *logLevel))
|
log.Info("Parser Log Level", zap.String("logLevel", *logLevel))
|
||||||
switch *logLevel {
|
switch *logLevel {
|
||||||
|
|||||||
956
tests/go_client/testcases/text_embedding_test.go
Normal file
956
tests/go_client/testcases/text_embedding_test.go
Normal file
@ -0,0 +1,956 @@
|
|||||||
|
package testcases
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/client/v2/column"
|
||||||
|
"github.com/milvus-io/milvus/client/v2/entity"
|
||||||
|
"github.com/milvus-io/milvus/client/v2/index"
|
||||||
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
||||||
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
||||||
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestCreateCollectionWithTextEmbedding tests basic collection creation with text embedding function
|
||||||
|
func TestCreateCollectionWithTextEmbedding(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// verify collection creation
|
||||||
|
require.NotNil(t, prepare)
|
||||||
|
require.NotNil(t, schema)
|
||||||
|
|
||||||
|
// describe collection to verify function
|
||||||
|
descRes, err := mc.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(schema.CollectionName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Len(t, descRes.Schema.Functions, 1)
|
||||||
|
require.Equal(t, "document_text_emb", descRes.Schema.Functions[0].Name)
|
||||||
|
require.Equal(t, entity.FunctionTypeTextEmbedding, descRes.Schema.Functions[0].Type)
|
||||||
|
require.Equal(t, []string{"document"}, descRes.Schema.Functions[0].InputFieldNames)
|
||||||
|
require.Equal(t, []string{"dense"}, descRes.Schema.Functions[0].OutputFieldNames)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCreateCollectionWithTextEmbeddingTwice tests creating collection twice with same schema
|
||||||
|
func TestCreateCollectionWithTextEmbeddingTwice(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
collectionName := common.GenRandomString("text_embedding", 6)
|
||||||
|
createParams := hp.NewCreateCollectionParams(hp.TextEmbedding)
|
||||||
|
|
||||||
|
// first creation
|
||||||
|
prepare1, schema1 := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc, createParams, fieldsOption,
|
||||||
|
schemaOption.TWithName(collectionName),
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
require.NotNil(t, prepare1)
|
||||||
|
require.NotNil(t, schema1)
|
||||||
|
|
||||||
|
// second creation with same name should succeed (idempotent)
|
||||||
|
prepare2, schema2 := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc, createParams, fieldsOption,
|
||||||
|
schemaOption.TWithName(collectionName),
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
require.NotNil(t, prepare2)
|
||||||
|
require.NotNil(t, schema2)
|
||||||
|
|
||||||
|
// verify function exists
|
||||||
|
descRes, err := mc.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Len(t, descRes.Schema.Functions, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCreateCollectionUnsupportedEndpoint tests creation with unsupported endpoint
|
||||||
|
func TestCreateCollectionUnsupportedEndpoint(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with invalid endpoint
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": "http://unsupported_endpoint",
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
// this should fail during collection creation
|
||||||
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
||||||
|
common.GenRandomString("text_embedding", 6),
|
||||||
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
||||||
|
))
|
||||||
|
|
||||||
|
// expect error due to unsupported endpoint
|
||||||
|
common.CheckErr(t, err, false, "unsupported_endpoint")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCreateCollectionUnmatchedDim tests creation with mismatched dimension
|
||||||
|
func TestCreateCollectionUnmatchedDim(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with wrong dimension (512 instead of expected 768 from TEI model)
|
||||||
|
wrongDim := int64(512)
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(wrongDim).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
collectionName := common.GenRandomString("text_embedding", 6)
|
||||||
|
|
||||||
|
// collection creation should fail with dimension mismatch error
|
||||||
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
||||||
|
collectionName,
|
||||||
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
||||||
|
))
|
||||||
|
|
||||||
|
// Expect error with specific dimension mismatch message
|
||||||
|
expectedError := fmt.Sprintf("required embedding dim is [%d], but the embedding obtained from the model is [%d]", wrongDim, hp.GetTEIModelDim())
|
||||||
|
common.CheckErr(t, err, false, expectedError)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInsertWithTextEmbedding tests basic data insertion with text embedding
|
||||||
|
func TestInsertWithTextEmbedding(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// prepare test data - only provide text, embedding will be auto-generated
|
||||||
|
nb := 10
|
||||||
|
documents := make([]string, nb)
|
||||||
|
for i := 0; i < nb; i++ {
|
||||||
|
documents[i] = fmt.Sprintf("This is test document number %d with some content for embedding", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert data using only text field
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(nb), res.InsertCount)
|
||||||
|
|
||||||
|
// create index and load
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// query to verify vectors were generated
|
||||||
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter("").WithOutputFields("dense").WithLimit(10))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Greater(t, len(resQuery.Fields), 0)
|
||||||
|
|
||||||
|
// verify vector dimension - check first result
|
||||||
|
if resQuery.Len() > 0 {
|
||||||
|
// Query results structure is different - need to check the actual field structure
|
||||||
|
denseColumn := resQuery.GetColumn("dense")
|
||||||
|
require.NotNil(t, denseColumn)
|
||||||
|
// Field should contain vectors for all results
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInsertWithTruncateParams tests insertion with different truncate parameters
|
||||||
|
func TestInsertWithTruncateParams(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
truncate bool
|
||||||
|
truncationDirection string
|
||||||
|
shouldSucceed bool
|
||||||
|
}{
|
||||||
|
{"truncate_true_right", true, "Right", true},
|
||||||
|
{"truncate_true_left", true, "Left", true},
|
||||||
|
{"truncate_false", false, "", false}, // should fail with long text
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create TEI function with truncate parameters
|
||||||
|
params := map[string]any{}
|
||||||
|
if tc.truncate {
|
||||||
|
params["truncate"] = "true"
|
||||||
|
params["truncation_direction"] = tc.truncationDirection
|
||||||
|
} else {
|
||||||
|
params["truncate"] = "false"
|
||||||
|
}
|
||||||
|
|
||||||
|
params["provider"] = "TEI"
|
||||||
|
params["endpoint"] = hp.GetTEIEndpoint()
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", params)
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
_, schema := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc,
|
||||||
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
||||||
|
fieldsOption,
|
||||||
|
schemaOption,
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
|
||||||
|
// prepare long text data that would need truncation
|
||||||
|
// Generate distinctly different left and right parts that will exceed token limits when combined
|
||||||
|
leftPart := "artificial intelligence machine learning deep learning neural networks computer vision natural language processing data science algorithms " + strings.Repeat("technology innovation science research development analysis ", 100)
|
||||||
|
rightPart := "database systems vector search embeddings similarity matching retrieval information storage indexing " + strings.Repeat("query performance optimization scalability distributed computing ", 100)
|
||||||
|
longText := leftPart + " " + rightPart // This will exceed 512 tokens and need truncation
|
||||||
|
|
||||||
|
documents := []string{longText, leftPart, rightPart}
|
||||||
|
|
||||||
|
// insert data
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
|
||||||
|
if tc.shouldSucceed {
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(len(documents)), res.InsertCount)
|
||||||
|
|
||||||
|
// create index and load for embedding comparison
|
||||||
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(schema.CollectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(schema.CollectionName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// Query embeddings from Milvus
|
||||||
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
||||||
|
WithFilter("").
|
||||||
|
WithOutputFields("dense", "document").
|
||||||
|
WithConsistencyLevel(entity.ClStrong).
|
||||||
|
WithLimit(10))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, len(documents), resQuery.Len())
|
||||||
|
|
||||||
|
// Extract Milvus embeddings
|
||||||
|
denseColumn := resQuery.GetColumn("dense")
|
||||||
|
require.NotNil(t, denseColumn)
|
||||||
|
floatVecColumn, ok := denseColumn.(*column.ColumnFloatVector)
|
||||||
|
require.True(t, ok, "Dense column should be a float vector column")
|
||||||
|
|
||||||
|
// Truncation validation using similarity comparison approach
|
||||||
|
// This follows the Python test logic: compare similarity between combined text and parts
|
||||||
|
// to verify that truncation direction works correctly
|
||||||
|
|
||||||
|
require.Equal(t, 3, resQuery.Len(), "Should have 3 documents: longText, leftPart, rightPart")
|
||||||
|
|
||||||
|
// Get embeddings for: [0]=longText, [1]=leftPart, [2]=rightPart
|
||||||
|
embeddings := make([][]float32, 3)
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
embedding := floatVecColumn.Data()[i]
|
||||||
|
require.Equal(t, hp.GetTEIModelDim(), len(embedding), "Embedding should have correct dimension")
|
||||||
|
|
||||||
|
// Check that embedding is not all zeros (would indicate a failure)
|
||||||
|
var sum float32
|
||||||
|
for _, val := range embedding {
|
||||||
|
sum += val * val
|
||||||
|
}
|
||||||
|
require.Greater(t, sum, float32(0.01), "Embedding should not be all zeros for document %d", i)
|
||||||
|
|
||||||
|
embeddings[i] = embedding
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate cosine similarities
|
||||||
|
// similarity_left: longText vs leftPart
|
||||||
|
// similarity_right: longText vs rightPart
|
||||||
|
similarityLeft := hp.CosineSimilarity(embeddings[0], embeddings[1])
|
||||||
|
similarityRight := hp.CosineSimilarity(embeddings[0], embeddings[2])
|
||||||
|
|
||||||
|
t.Logf("Similarity longText vs leftPart: %.6f", similarityLeft)
|
||||||
|
t.Logf("Similarity longText vs rightPart: %.6f", similarityRight)
|
||||||
|
|
||||||
|
// Validation based on truncation direction:
|
||||||
|
// - If truncation_direction = "Left", we keep the right part, so longText should be more similar to rightPart
|
||||||
|
// - If truncation_direction = "Right", we keep the left part, so longText should be more similar to leftPart
|
||||||
|
if tc.truncationDirection == "Left" {
|
||||||
|
require.Greater(t, similarityRight, similarityLeft,
|
||||||
|
"With Left truncation, longText should be more similar to rightPart (%.6f) than leftPart (%.6f)",
|
||||||
|
similarityRight, similarityLeft)
|
||||||
|
t.Logf("Left truncation verified: rightPart similarity (%.6f) > leftPart similarity (%.6f)",
|
||||||
|
similarityRight, similarityLeft)
|
||||||
|
} else { // "Right"
|
||||||
|
require.Greater(t, similarityLeft, similarityRight,
|
||||||
|
"With Right truncation, longText should be more similar to leftPart (%.6f) than rightPart (%.6f)",
|
||||||
|
similarityLeft, similarityRight)
|
||||||
|
t.Logf("Right truncation verified: leftPart similarity (%.6f) > rightPart similarity (%.6f)",
|
||||||
|
similarityLeft, similarityRight)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Successfully inserted %d documents with truncate=%v, direction=%s", len(documents), tc.truncate, tc.truncationDirection)
|
||||||
|
} else {
|
||||||
|
common.CheckErr(t, err, false, "Payload Too Large")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestVerifyEmbeddingConsistency verifies that Milvus text embedding function produces same results as direct TEI calls
|
||||||
|
func TestVerifyEmbeddingConsistency(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function (custom fields for autoID=false)
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(false).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc,
|
||||||
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
||||||
|
fieldsOption,
|
||||||
|
schemaOption,
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Test documents
|
||||||
|
testDocs := []string{
|
||||||
|
"This is a test document about artificial intelligence",
|
||||||
|
"Vector databases enable semantic search capabilities",
|
||||||
|
"Text embeddings transform language into numbers",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert documents into Milvus (will use text embedding function)
|
||||||
|
ids := []int64{1, 2, 3}
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
||||||
|
WithInt64Column(common.DefaultInt64FieldName, ids).
|
||||||
|
WithVarcharColumn("document", testDocs))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(len(testDocs)), res.InsertCount)
|
||||||
|
|
||||||
|
// Create index and load
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// Query vectors from Milvus
|
||||||
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
||||||
|
WithFilter("").
|
||||||
|
WithOutputFields("dense", "document", common.DefaultInt64FieldName).
|
||||||
|
WithConsistencyLevel(entity.ClStrong).
|
||||||
|
WithLimit(10))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, len(testDocs), resQuery.Len())
|
||||||
|
|
||||||
|
// Get embeddings directly from TEI
|
||||||
|
teiEmbeddings, err := hp.CallTEIDirectly(hp.GetTEIEndpoint(), testDocs)
|
||||||
|
if err != nil {
|
||||||
|
t.Skipf("Skip consistency test - could not connect to TEI endpoint: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
require.Equal(t, len(testDocs), len(teiEmbeddings))
|
||||||
|
|
||||||
|
// Compare embeddings
|
||||||
|
denseColumn := resQuery.GetColumn("dense")
|
||||||
|
require.NotNil(t, denseColumn)
|
||||||
|
|
||||||
|
// Get ID column to match embeddings with documents
|
||||||
|
idColumn := resQuery.GetColumn(common.DefaultInt64FieldName)
|
||||||
|
require.NotNil(t, idColumn)
|
||||||
|
|
||||||
|
// Extract and compare embeddings - need to handle column type properly
|
||||||
|
floatVecColumn, ok := denseColumn.(*column.ColumnFloatVector)
|
||||||
|
require.True(t, ok, "Dense column should be a float vector column")
|
||||||
|
|
||||||
|
for i := 0; i < resQuery.Len(); i++ {
|
||||||
|
// Get ID to find corresponding TEI embedding
|
||||||
|
id, err := idColumn.GetAsInt64(i)
|
||||||
|
require.NoError(t, err)
|
||||||
|
teiIdx := id - 1 // IDs are 1-based, array is 0-based
|
||||||
|
|
||||||
|
// Get Milvus embedding from the float vector column
|
||||||
|
milvusEmbedding := floatVecColumn.Data()[i]
|
||||||
|
|
||||||
|
require.NotNil(t, milvusEmbedding)
|
||||||
|
require.Equal(t, hp.GetTEIModelDim(), len(milvusEmbedding), "Embedding dimension should match")
|
||||||
|
|
||||||
|
// Calculate cosine similarity
|
||||||
|
similarity := hp.CosineSimilarity(milvusEmbedding, teiEmbeddings[teiIdx])
|
||||||
|
|
||||||
|
t.Logf("Document %d (ID=%d) similarity between Milvus and TEI: %.6f", i, id, similarity)
|
||||||
|
|
||||||
|
// Embeddings should be nearly identical (similarity > 0.99)
|
||||||
|
require.Greater(t, similarity, float32(0.99),
|
||||||
|
"Milvus embedding should be nearly identical to TEI embedding for document ID %d", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Log("Embedding consistency verified: Milvus text embedding function produces same results as direct TEI calls")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestUpsertTextFieldUpdatesEmbedding tests that upserting text field updates embedding
|
||||||
|
func TestUpsertTextFieldUpdatesEmbedding(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function (custom fields for autoID=false for upsert)
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(false).TWithMaxLen(65535) // disable auto ID for upsert
|
||||||
|
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc,
|
||||||
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
||||||
|
fieldsOption,
|
||||||
|
schemaOption,
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
|
||||||
|
// create index and load first
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// insert initial data with specific ID
|
||||||
|
oldText := "This is the original text content"
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
||||||
|
WithInt64Column(common.DefaultInt64FieldName, []int64{1}).
|
||||||
|
WithVarcharColumn("document", []string{oldText}))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(1), res.InsertCount)
|
||||||
|
|
||||||
|
// query original embedding before upsert
|
||||||
|
resQueryBefore, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
||||||
|
WithFilter("int64 == 1").
|
||||||
|
WithOutputFields("document", "dense").
|
||||||
|
WithConsistencyLevel(entity.ClStrong))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, 1, resQueryBefore.Len())
|
||||||
|
|
||||||
|
// extract original embedding
|
||||||
|
originalDenseColumn := resQueryBefore.GetColumn("dense")
|
||||||
|
require.NotNil(t, originalDenseColumn)
|
||||||
|
originalFloatVecColumn, ok := originalDenseColumn.(*column.ColumnFloatVector)
|
||||||
|
require.True(t, ok, "Dense column should be a float vector column")
|
||||||
|
originalEmbedding := originalFloatVecColumn.Data()[0]
|
||||||
|
require.Equal(t, hp.GetTEIModelDim(), len(originalEmbedding), "Original embedding dimension should match")
|
||||||
|
|
||||||
|
// verify original text
|
||||||
|
originalDocColumn := resQueryBefore.GetColumn("document")
|
||||||
|
require.NotNil(t, originalDocColumn)
|
||||||
|
originalVarCharColumn, ok := originalDocColumn.(*column.ColumnVarChar)
|
||||||
|
require.True(t, ok, "Document column should be a varchar column")
|
||||||
|
require.Equal(t, oldText, originalVarCharColumn.Data()[0], "Original text should match")
|
||||||
|
|
||||||
|
// upsert with new text
|
||||||
|
newText := "This is completely different updated text content"
|
||||||
|
res2, err := mc.Upsert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
||||||
|
WithInt64Column(common.DefaultInt64FieldName, []int64{1}).
|
||||||
|
WithVarcharColumn("document", []string{newText}))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(1), res2.UpsertCount)
|
||||||
|
|
||||||
|
// query updated embedding after upsert
|
||||||
|
resQueryAfter, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
||||||
|
WithFilter("int64 == 1").
|
||||||
|
WithOutputFields("document", "dense").
|
||||||
|
WithConsistencyLevel(entity.ClStrong))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, 1, resQueryAfter.Len())
|
||||||
|
|
||||||
|
// extract updated embedding
|
||||||
|
updatedDenseColumn := resQueryAfter.GetColumn("dense")
|
||||||
|
require.NotNil(t, updatedDenseColumn)
|
||||||
|
updatedFloatVecColumn, ok := updatedDenseColumn.(*column.ColumnFloatVector)
|
||||||
|
require.True(t, ok, "Dense column should be a float vector column")
|
||||||
|
updatedEmbedding := updatedFloatVecColumn.Data()[0]
|
||||||
|
require.Equal(t, hp.GetTEIModelDim(), len(updatedEmbedding), "Updated embedding dimension should match")
|
||||||
|
|
||||||
|
// verify updated text
|
||||||
|
updatedDocColumn := resQueryAfter.GetColumn("document")
|
||||||
|
require.NotNil(t, updatedDocColumn)
|
||||||
|
updatedVarCharColumn, ok := updatedDocColumn.(*column.ColumnVarChar)
|
||||||
|
require.True(t, ok, "Document column should be a varchar column")
|
||||||
|
require.Equal(t, newText, updatedVarCharColumn.Data()[0], "Updated text should match")
|
||||||
|
|
||||||
|
// verify embeddings are different (key assertion)
|
||||||
|
similarity := hp.CosineSimilarity(originalEmbedding, updatedEmbedding)
|
||||||
|
require.Less(t, similarity, float32(0.95),
|
||||||
|
"Embeddings should be significantly different after text update (similarity=%.6f)", similarity)
|
||||||
|
|
||||||
|
t.Logf("Upsert verification complete: Original and updated embeddings have cosine similarity %.6f (< 0.95)", similarity)
|
||||||
|
t.Logf(" Original text: %s", oldText)
|
||||||
|
t.Logf(" Updated text: %s", newText)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDeleteAndSearch tests that deleted text cannot be searched
|
||||||
|
func TestDeleteAndSearch(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function (custom fields for autoID=false)
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(false).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc,
|
||||||
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
||||||
|
fieldsOption,
|
||||||
|
schemaOption,
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
|
||||||
|
// insert test data
|
||||||
|
documents := []string{
|
||||||
|
"This is test document 0",
|
||||||
|
"This is test document 1",
|
||||||
|
"This is test document 2",
|
||||||
|
}
|
||||||
|
ids := []int64{0, 1, 2}
|
||||||
|
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
||||||
|
WithInt64Column(common.DefaultInt64FieldName, ids).
|
||||||
|
WithVarcharColumn("document", documents))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(3), res.InsertCount)
|
||||||
|
|
||||||
|
// create index and load
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// delete document with ID 1
|
||||||
|
res2, err := mc.Delete(ctx, milvusclient.NewDeleteOption(schema.CollectionName).WithExpr("int64 in [1]"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(1), res2.DeleteCount)
|
||||||
|
|
||||||
|
// search and verify document 1 is not in results
|
||||||
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 3, []entity.Vector{entity.Text("test document 1")}).
|
||||||
|
WithANNSField("dense").
|
||||||
|
WithOutputFields("document", common.DefaultInt64FieldName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// verify deleted document is not in results
|
||||||
|
require.Greater(t, len(searchRes), 0)
|
||||||
|
for _, hits := range searchRes {
|
||||||
|
for i := 0; i < hits.Len(); i++ {
|
||||||
|
id, err := hits.IDs.GetAsInt64(i)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEqual(t, int64(1), id, "Deleted document should not appear in search results")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSearchWithTextEmbedding tests search functionality with text embedding
|
||||||
|
func TestSearchWithTextEmbedding(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create -> insert -> index -> load
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// prepare test data
|
||||||
|
nb := 10
|
||||||
|
documents := make([]string, nb)
|
||||||
|
for i := 0; i < nb; i++ {
|
||||||
|
documents[i] = fmt.Sprintf("This is test document number %d about artificial intelligence and machine learning", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert data using only text field
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(nb), res.InsertCount)
|
||||||
|
|
||||||
|
// create index and load
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// search using text query
|
||||||
|
queryText := "artificial intelligence machine learning"
|
||||||
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("dense").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
require.Greater(t, len(searchRes), 0)
|
||||||
|
for _, hits := range searchRes {
|
||||||
|
require.Greater(t, hits.Len(), 0, "Should find relevant documents")
|
||||||
|
require.LessOrEqual(t, hits.Len(), 5, "Should respect limit")
|
||||||
|
|
||||||
|
// verify results contain the search terms (semantic similarity)
|
||||||
|
for i := 0; i < hits.Len(); i++ {
|
||||||
|
score := hits.Scores[i]
|
||||||
|
require.Greater(t, score, float32(0), "Score should be positive")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSearchWithEmptyQuery tests search with empty query (should fail)
|
||||||
|
func TestSearchWithEmptyQuery(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// insert some test data
|
||||||
|
documents := []string{"test document"}
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(1), res.InsertCount)
|
||||||
|
|
||||||
|
// create index and load
|
||||||
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(schema.CollectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(schema.CollectionName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// search with empty query should fail
|
||||||
|
_, err = mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 3, []entity.Vector{entity.Text("")}).
|
||||||
|
WithANNSField("dense"))
|
||||||
|
|
||||||
|
common.CheckErr(t, err, false, "TextEmbedding function does not support empty text")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHybridSearchTextEmbeddingBM25 tests hybrid search combining TEI text embedding and BM25
|
||||||
|
func TestHybridSearchTextEmbeddingBM25(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with both TEI text embedding and BM25 functions
|
||||||
|
collectionName := common.GenRandomString("hybrid_search", 6)
|
||||||
|
|
||||||
|
// create fields manually to support both dense and sparse vectors
|
||||||
|
fields := []*entity.Field{
|
||||||
|
entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true),
|
||||||
|
entity.NewField().WithName("document").WithDataType(entity.FieldTypeVarChar).WithMaxLength(65535).WithEnableAnalyzer(true).WithAnalyzerParams(map[string]any{"tokenizer": "standard"}),
|
||||||
|
entity.NewField().WithName("dense").WithDataType(entity.FieldTypeFloatVector).WithDim(int64(hp.GetTEIModelDim())),
|
||||||
|
entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector),
|
||||||
|
}
|
||||||
|
|
||||||
|
// create TEI text embedding function
|
||||||
|
teiFunction := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// create BM25 function
|
||||||
|
bm25Function := hp.TNewBM25Function("document", "sparse")
|
||||||
|
|
||||||
|
// create schema with both functions
|
||||||
|
schema := entity.NewSchema().
|
||||||
|
WithName(collectionName).
|
||||||
|
WithDescription("Hybrid search collection with TEI and BM25").
|
||||||
|
WithFunction(teiFunction).
|
||||||
|
WithFunction(bm25Function)
|
||||||
|
|
||||||
|
for _, field := range fields {
|
||||||
|
schema.WithField(field)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create collection
|
||||||
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// insert test data with diverse content
|
||||||
|
documents := []string{
|
||||||
|
"Artificial intelligence and machine learning are transforming technology",
|
||||||
|
"Vector databases enable semantic search capabilities for AI applications",
|
||||||
|
"Text embeddings capture semantic meaning in numerical representations",
|
||||||
|
"BM25 is a traditional keyword-based search algorithm",
|
||||||
|
"Hybrid search combines semantic and keyword-based retrieval methods",
|
||||||
|
"Large language models use transformer architectures for text understanding",
|
||||||
|
"Information retrieval systems help users find relevant documents",
|
||||||
|
"Natural language processing enables computers to understand human language",
|
||||||
|
"Database systems store and retrieve structured information efficiently",
|
||||||
|
"Search engines use ranking algorithms to order results by relevance",
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert data - both embeddings will be generated automatically
|
||||||
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithVarcharColumn("document", documents))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
require.Equal(t, int64(len(documents)), res.InsertCount)
|
||||||
|
|
||||||
|
// create indexes
|
||||||
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "sparse", index.NewSparseInvertedIndex(entity.BM25, 0.1)))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// load collection
|
||||||
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// test 1: Dense vector search (TEI semantic search)
|
||||||
|
t.Run("DenseVectorSearch", func(t *testing.T) {
|
||||||
|
queryText := "machine learning artificial intelligence"
|
||||||
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 3, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("dense").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
require.Greater(t, len(searchRes), 0)
|
||||||
|
for _, hits := range searchRes {
|
||||||
|
require.Greater(t, hits.Len(), 0, "Should find semantically similar documents")
|
||||||
|
t.Logf("Dense search found %d results for query: %s", hits.Len(), queryText)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// test 2: Sparse vector search (BM25 keyword search)
|
||||||
|
t.Run("SparseVectorSearch", func(t *testing.T) {
|
||||||
|
queryText := "database systems"
|
||||||
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 3, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("sparse").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
require.Greater(t, len(searchRes), 0)
|
||||||
|
for _, hits := range searchRes {
|
||||||
|
require.Greater(t, hits.Len(), 0, "Should find keyword-matching documents")
|
||||||
|
t.Logf("Sparse search found %d results for query: %s", hits.Len(), queryText)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// test 3: Both search types work independently
|
||||||
|
t.Run("IndependentSearches", func(t *testing.T) {
|
||||||
|
queryText := "vector search"
|
||||||
|
|
||||||
|
// Dense search
|
||||||
|
denseRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("dense").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// Sparse search
|
||||||
|
sparseRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("sparse").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
common.CheckErr(t, err, true)
|
||||||
|
|
||||||
|
// Both should return results
|
||||||
|
require.Greater(t, len(denseRes), 0, "Dense search should return results")
|
||||||
|
require.Greater(t, len(sparseRes), 0, "Sparse search should return results")
|
||||||
|
|
||||||
|
for _, hits := range denseRes {
|
||||||
|
require.Greater(t, hits.Len(), 0, "Dense search should find documents")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hits := range sparseRes {
|
||||||
|
require.Greater(t, hits.Len(), 0, "Sparse search should find documents")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Dense search found %d results, Sparse search found %d results",
|
||||||
|
len(denseRes), len(sparseRes))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInsertEmptyDocument tests insertion with empty document
|
||||||
|
func TestInsertEmptyDocument(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// try to insert empty document
|
||||||
|
documents := []string{"", "normal document"}
|
||||||
|
|
||||||
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
|
||||||
|
// should fail with empty document
|
||||||
|
common.CheckErr(t, err, false, "TextEmbedding function does not support empty text")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInsertLongDocument tests insertion with very long document
|
||||||
|
func TestInsertLongDocument(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function (no truncate)
|
||||||
|
params := map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": hp.GetTEIEndpoint(),
|
||||||
|
"truncate": "false",
|
||||||
|
}
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", params)
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
_, schema := hp.CollPrepare.CreateCollection(
|
||||||
|
ctx, t, mc,
|
||||||
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
||||||
|
fieldsOption,
|
||||||
|
schemaOption,
|
||||||
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
||||||
|
)
|
||||||
|
|
||||||
|
// try to insert very long document that exceeds model limits
|
||||||
|
longDocument := hp.GenLongText(8192, "english") // Very long text
|
||||||
|
documents := []string{longDocument}
|
||||||
|
|
||||||
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
|
||||||
|
// should fail with long document when truncate is false
|
||||||
|
common.CheckErr(t, err, false, "Call service failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInvalidEndpointHandling tests various invalid endpoint scenarios
|
||||||
|
func TestInvalidEndpointHandling(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
endpoint string
|
||||||
|
errMsg string
|
||||||
|
}{
|
||||||
|
{"NonExistentHost", "http://nonexistent-host:8080", "nonexistent-host"},
|
||||||
|
{"InvalidPort", "http://localhost:99999", "99999"},
|
||||||
|
{"InvalidProtocol", "ftp://localhost:8080", "ftp"},
|
||||||
|
{"EmptyEndpoint", "", "endpoint"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with invalid endpoint
|
||||||
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
||||||
|
"provider": "TEI",
|
||||||
|
"endpoint": tc.endpoint,
|
||||||
|
})
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
// collection creation should fail for invalid endpoints
|
||||||
|
collectionName := common.GenRandomString("test_invalid", 6)
|
||||||
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
||||||
|
collectionName,
|
||||||
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
||||||
|
))
|
||||||
|
|
||||||
|
common.CheckErr(t, err, false, tc.errMsg)
|
||||||
|
t.Logf("Expected error for %s: %v", tc.name, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMissingRequiredParameters tests creation with missing required parameters
|
||||||
|
func TestMissingRequiredParameters(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
params map[string]any
|
||||||
|
errMsg string
|
||||||
|
}{
|
||||||
|
{"MissingProvider", map[string]any{"endpoint": hp.GetTEIEndpoint()}, "provider"},
|
||||||
|
{"MissingEndpoint", map[string]any{"provider": "TEI"}, "endpoint"},
|
||||||
|
{"WrongProvider", map[string]any{"provider": "InvalidProvider", "endpoint": hp.GetTEIEndpoint()}, "invalidprovider"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
// create function with incomplete parameters
|
||||||
|
function := entity.NewFunction().
|
||||||
|
WithName("incomplete_func").
|
||||||
|
WithInputFields("document").
|
||||||
|
WithOutputFields("dense").
|
||||||
|
WithType(entity.FunctionTypeTextEmbedding)
|
||||||
|
|
||||||
|
for key, value := range tc.params {
|
||||||
|
function.WithParam(key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
||||||
|
fieldsOption := hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim())).TWithAutoID(true).TWithMaxLen(65535)
|
||||||
|
|
||||||
|
// collection creation should fail
|
||||||
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
||||||
|
common.GenRandomString("test_incomplete", 6),
|
||||||
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
||||||
|
))
|
||||||
|
|
||||||
|
common.CheckErr(t, err, false, tc.errMsg)
|
||||||
|
t.Logf("Expected error for %s: %v", tc.name, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestConcurrentOperations tests concurrent text embedding operations
|
||||||
|
func TestConcurrentOperations(t *testing.T) {
|
||||||
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2) // longer timeout for concurrent ops
|
||||||
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
||||||
|
|
||||||
|
// create collection with TEI function
|
||||||
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), hp.TNewTextEmbeddingFieldsOption(), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
||||||
|
|
||||||
|
// create index and load
|
||||||
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
||||||
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
||||||
|
|
||||||
|
// concurrent inserts
|
||||||
|
t.Run("ConcurrentInserts", func(t *testing.T) {
|
||||||
|
numRoutines := 5
|
||||||
|
documentsPerRoutine := 5
|
||||||
|
|
||||||
|
results := make(chan error, numRoutines)
|
||||||
|
|
||||||
|
for i := 0; i < numRoutines; i++ {
|
||||||
|
go func(routineID int) {
|
||||||
|
documents := make([]string, documentsPerRoutine)
|
||||||
|
for j := 0; j < documentsPerRoutine; j++ {
|
||||||
|
documents[j] = fmt.Sprintf("Concurrent document from routine %d, doc %d", routineID, j)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
||||||
|
results <- err
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for all goroutines to complete
|
||||||
|
for i := 0; i < numRoutines; i++ {
|
||||||
|
err := <-results
|
||||||
|
require.NoError(t, err, "Concurrent insert should succeed")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Successfully completed %d concurrent inserts with %d documents each", numRoutines, documentsPerRoutine)
|
||||||
|
})
|
||||||
|
|
||||||
|
// concurrent searches
|
||||||
|
t.Run("ConcurrentSearches", func(t *testing.T) {
|
||||||
|
numRoutines := 3
|
||||||
|
|
||||||
|
results := make(chan error, numRoutines)
|
||||||
|
|
||||||
|
for i := 0; i < numRoutines; i++ {
|
||||||
|
go func(routineID int) {
|
||||||
|
queryText := fmt.Sprintf("document routine %d", routineID)
|
||||||
|
_, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
||||||
|
WithANNSField("dense").
|
||||||
|
WithOutputFields("document"))
|
||||||
|
results <- err
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for all searches to complete
|
||||||
|
for i := 0; i < numRoutines; i++ {
|
||||||
|
err := <-results
|
||||||
|
require.NoError(t, err, "Concurrent search should succeed")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Successfully completed %d concurrent searches", numRoutines)
|
||||||
|
})
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user