mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
https://github.com/milvus-io/milvus/issues/45842 Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
975 lines
40 KiB
Go
975 lines
40 KiB
Go
package testcases
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/column"
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
|
)
|
|
|
|
// newTextEmbeddingFieldsOption creates fields option with text embedding settings
|
|
func newTextEmbeddingFieldsOption(autoId bool) hp.FieldOptions {
|
|
fieldOpts := hp.TNewFieldOptions().
|
|
WithFieldOption("document", hp.TNewFieldsOption().TWithMaxLen(common.MaxLength)).
|
|
WithFieldOption("dense", hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim()))).
|
|
WithFieldOption(common.DefaultInt64FieldName, hp.TNewFieldsOption().TWithAutoID(autoId))
|
|
return fieldOpts
|
|
}
|
|
|
|
// TestCreateCollectionWithTextEmbedding tests basic collection creation with text embedding function
|
|
func TestCreateCollectionWithTextEmbedding(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// verify collection creation
|
|
require.NotNil(t, prepare)
|
|
require.NotNil(t, schema)
|
|
|
|
// describe collection to verify function
|
|
descRes, err := mc.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(schema.CollectionName))
|
|
common.CheckErr(t, err, true)
|
|
require.Len(t, descRes.Schema.Functions, 1)
|
|
require.Equal(t, "document_text_emb", descRes.Schema.Functions[0].Name)
|
|
require.Equal(t, entity.FunctionTypeTextEmbedding, descRes.Schema.Functions[0].Type)
|
|
require.Equal(t, []string{"document"}, descRes.Schema.Functions[0].InputFieldNames)
|
|
require.Equal(t, []string{"dense"}, descRes.Schema.Functions[0].OutputFieldNames)
|
|
}
|
|
|
|
// TestCreateCollectionWithTextEmbeddingTwice tests creating collection twice with same schema
|
|
func TestCreateCollectionWithTextEmbeddingTwice(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(true)
|
|
|
|
collectionName := common.GenRandomString("text_embedding", 6)
|
|
createParams := hp.NewCreateCollectionParams(hp.TextEmbedding)
|
|
|
|
// first creation
|
|
prepare1, schema1 := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc, createParams, fieldsOption,
|
|
schemaOption.TWithName(collectionName),
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
require.NotNil(t, prepare1)
|
|
require.NotNil(t, schema1)
|
|
|
|
// second creation with same name should succeed (idempotent)
|
|
prepare2, schema2 := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc, createParams, fieldsOption,
|
|
schemaOption.TWithName(collectionName),
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
require.NotNil(t, prepare2)
|
|
require.NotNil(t, schema2)
|
|
|
|
// verify function exists
|
|
descRes, err := mc.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName))
|
|
common.CheckErr(t, err, true)
|
|
require.Len(t, descRes.Schema.Functions, 1)
|
|
}
|
|
|
|
// TestCreateCollectionUnsupportedEndpoint tests creation with unsupported endpoint
|
|
func TestCreateCollectionUnsupportedEndpoint(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with invalid endpoint
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": "http://unsupported_endpoint",
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
|
|
// this should fail during collection creation
|
|
fieldOpts := hp.TNewFieldOptions().
|
|
WithFieldOption("document", hp.TNewFieldsOption().TWithMaxLen(common.MaxLength)).
|
|
WithFieldOption("dense", hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim()))).
|
|
WithFieldOption(common.DefaultInt64FieldName, hp.TNewFieldsOption().TWithAutoID(true))
|
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
|
common.GenRandomString("text_embedding", 6),
|
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldOpts))),
|
|
))
|
|
|
|
// expect error due to unsupported endpoint
|
|
common.CheckErr(t, err, false, "unsupported_endpoint")
|
|
}
|
|
|
|
// TestCreateCollectionUnmatchedDim tests creation with mismatched dimension
|
|
func TestCreateCollectionUnmatchedDim(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with wrong dimension (512 instead of expected 768 from TEI model)
|
|
wrongDim := int64(512)
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := hp.TNewFieldOptions().
|
|
WithFieldOption("document", hp.TNewFieldsOption().TWithMaxLen(common.MaxLength)).
|
|
WithFieldOption("dense", hp.TNewFieldsOption().TWithDim(wrongDim)).
|
|
WithFieldOption(common.DefaultInt64FieldName, hp.TNewFieldsOption().TWithAutoID(true))
|
|
|
|
collectionName := common.GenRandomString("text_embedding", 6)
|
|
|
|
// collection creation should fail with dimension mismatch error
|
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
|
collectionName,
|
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
|
))
|
|
|
|
// Expect error with specific dimension mismatch message
|
|
expectedError := fmt.Sprintf("required embedding dim is [%d], but the embedding obtained from the model is [%d]", wrongDim, hp.GetTEIModelDim())
|
|
common.CheckErr(t, err, false, expectedError)
|
|
}
|
|
|
|
// TestInsertWithTextEmbedding tests basic data insertion with text embedding
|
|
func TestInsertWithTextEmbedding(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// prepare test data - only provide text, embedding will be auto-generated
|
|
nb := 10
|
|
documents := make([]string, nb)
|
|
for i := 0; i < nb; i++ {
|
|
documents[i] = fmt.Sprintf("This is test document number %d with some content for embedding", i)
|
|
}
|
|
|
|
// insert data using only text field
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(nb), res.InsertCount)
|
|
|
|
// create index and load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// query to verify vectors were generated
|
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter("").WithOutputFields("dense").WithLimit(10))
|
|
common.CheckErr(t, err, true)
|
|
require.Greater(t, len(resQuery.Fields), 0)
|
|
|
|
// verify vector dimension - check first result
|
|
if resQuery.Len() > 0 {
|
|
// Query results structure is different - need to check the actual field structure
|
|
denseColumn := resQuery.GetColumn("dense")
|
|
require.NotNil(t, denseColumn)
|
|
// Field should contain vectors for all results
|
|
}
|
|
}
|
|
|
|
// TestInsertWithTruncateParams tests insertion with different truncate parameters
|
|
func TestInsertWithTruncateParams(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
truncate bool
|
|
truncationDirection string
|
|
shouldSucceed bool
|
|
}{
|
|
{"truncate_true_right", true, "Right", true},
|
|
{"truncate_true_left", true, "Left", true},
|
|
{"truncate_false", false, "", false}, // should fail with long text
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create TEI function with truncate parameters
|
|
params := map[string]any{}
|
|
if tc.truncate {
|
|
params["truncate"] = "true"
|
|
params["truncation_direction"] = tc.truncationDirection
|
|
} else {
|
|
params["truncate"] = "false"
|
|
}
|
|
|
|
params["provider"] = "TEI"
|
|
params["endpoint"] = hp.GetTEIEndpoint()
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", params)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(true)
|
|
|
|
_, schema := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc,
|
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
|
fieldsOption,
|
|
schemaOption,
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
|
|
// prepare long text data that would need truncation
|
|
// Generate distinctly different left and right parts that will exceed token limits when combined
|
|
leftPart := "artificial intelligence machine learning deep learning neural networks computer vision natural language processing data science algorithms " + strings.Repeat("technology innovation science research development analysis ", 100)
|
|
rightPart := "database systems vector search embeddings similarity matching retrieval information storage indexing " + strings.Repeat("query performance optimization scalability distributed computing ", 100)
|
|
longText := leftPart + " " + rightPart // This will exceed 512 tokens and need truncation
|
|
|
|
documents := []string{longText, leftPart, rightPart}
|
|
|
|
// insert data
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
|
|
if tc.shouldSucceed {
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(len(documents)), res.InsertCount)
|
|
|
|
// create index and load for embedding comparison
|
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(schema.CollectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
|
common.CheckErr(t, err, true)
|
|
|
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(schema.CollectionName))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Query embeddings from Milvus
|
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
|
WithFilter("").
|
|
WithOutputFields("dense", "document").
|
|
WithConsistencyLevel(entity.ClStrong).
|
|
WithLimit(10))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, len(documents), resQuery.Len())
|
|
|
|
// Extract Milvus embeddings
|
|
denseColumn := resQuery.GetColumn("dense")
|
|
require.NotNil(t, denseColumn)
|
|
floatVecColumn, ok := denseColumn.(*column.ColumnFloatVector)
|
|
require.True(t, ok, "Dense column should be a float vector column")
|
|
|
|
// Truncation validation using similarity comparison approach
|
|
// This follows the Python test logic: compare similarity between combined text and parts
|
|
// to verify that truncation direction works correctly
|
|
|
|
require.Equal(t, 3, resQuery.Len(), "Should have 3 documents: longText, leftPart, rightPart")
|
|
|
|
// Get embeddings for: [0]=longText, [1]=leftPart, [2]=rightPart
|
|
embeddings := make([][]float32, 3)
|
|
for i := 0; i < 3; i++ {
|
|
embedding := floatVecColumn.Data()[i]
|
|
require.Equal(t, hp.GetTEIModelDim(), len(embedding), "Embedding should have correct dimension")
|
|
|
|
// Check that embedding is not all zeros (would indicate a failure)
|
|
var sum float32
|
|
for _, val := range embedding {
|
|
sum += val * val
|
|
}
|
|
require.Greater(t, sum, float32(0.01), "Embedding should not be all zeros for document %d", i)
|
|
|
|
embeddings[i] = embedding
|
|
}
|
|
|
|
// Calculate cosine similarities
|
|
// similarity_left: longText vs leftPart
|
|
// similarity_right: longText vs rightPart
|
|
similarityLeft := hp.CosineSimilarity(embeddings[0], embeddings[1])
|
|
similarityRight := hp.CosineSimilarity(embeddings[0], embeddings[2])
|
|
|
|
t.Logf("Similarity longText vs leftPart: %.6f", similarityLeft)
|
|
t.Logf("Similarity longText vs rightPart: %.6f", similarityRight)
|
|
|
|
// Validation based on truncation direction:
|
|
// - If truncation_direction = "Left", we keep the right part, so longText should be more similar to rightPart
|
|
// - If truncation_direction = "Right", we keep the left part, so longText should be more similar to leftPart
|
|
if tc.truncationDirection == "Left" {
|
|
require.Greater(t, similarityRight, similarityLeft,
|
|
"With Left truncation, longText should be more similar to rightPart (%.6f) than leftPart (%.6f)",
|
|
similarityRight, similarityLeft)
|
|
t.Logf("Left truncation verified: rightPart similarity (%.6f) > leftPart similarity (%.6f)",
|
|
similarityRight, similarityLeft)
|
|
} else { // "Right"
|
|
require.Greater(t, similarityLeft, similarityRight,
|
|
"With Right truncation, longText should be more similar to leftPart (%.6f) than rightPart (%.6f)",
|
|
similarityLeft, similarityRight)
|
|
t.Logf("Right truncation verified: leftPart similarity (%.6f) > rightPart similarity (%.6f)",
|
|
similarityLeft, similarityRight)
|
|
}
|
|
|
|
t.Logf("Successfully inserted %d documents with truncate=%v, direction=%s", len(documents), tc.truncate, tc.truncationDirection)
|
|
} else {
|
|
common.CheckErr(t, err, false, "Payload Too Large")
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestVerifyEmbeddingConsistency verifies that Milvus text embedding function produces same results as direct TEI calls
|
|
func TestVerifyEmbeddingConsistency(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function (custom fields for autoID=false)
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(false)
|
|
|
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc,
|
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
|
fieldsOption,
|
|
schemaOption,
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
|
|
// Test documents
|
|
testDocs := []string{
|
|
"This is a test document about artificial intelligence",
|
|
"Vector databases enable semantic search capabilities",
|
|
"Text embeddings transform language into numbers",
|
|
}
|
|
|
|
// Insert documents into Milvus (will use text embedding function)
|
|
ids := []int64{1, 2, 3}
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
|
WithInt64Column(common.DefaultInt64FieldName, ids).
|
|
WithVarcharColumn("document", testDocs))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(len(testDocs)), res.InsertCount)
|
|
|
|
// Create index and load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Query vectors from Milvus
|
|
resQuery, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
|
WithFilter("").
|
|
WithOutputFields("dense", "document", common.DefaultInt64FieldName).
|
|
WithConsistencyLevel(entity.ClStrong).
|
|
WithLimit(10))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, len(testDocs), resQuery.Len())
|
|
|
|
// Get embeddings directly from TEI
|
|
teiEmbeddings, err := hp.CallTEIDirectly(hp.GetTEIEndpoint(), testDocs)
|
|
if err != nil {
|
|
t.Skipf("Skip consistency test - could not connect to TEI endpoint: %v", err)
|
|
return
|
|
}
|
|
require.Equal(t, len(testDocs), len(teiEmbeddings))
|
|
|
|
// Compare embeddings
|
|
denseColumn := resQuery.GetColumn("dense")
|
|
require.NotNil(t, denseColumn)
|
|
|
|
// Get ID column to match embeddings with documents
|
|
idColumn := resQuery.GetColumn(common.DefaultInt64FieldName)
|
|
require.NotNil(t, idColumn)
|
|
|
|
// Extract and compare embeddings - need to handle column type properly
|
|
floatVecColumn, ok := denseColumn.(*column.ColumnFloatVector)
|
|
require.True(t, ok, "Dense column should be a float vector column")
|
|
|
|
for i := 0; i < resQuery.Len(); i++ {
|
|
// Get ID to find corresponding TEI embedding
|
|
id, err := idColumn.GetAsInt64(i)
|
|
require.NoError(t, err)
|
|
teiIdx := id - 1 // IDs are 1-based, array is 0-based
|
|
|
|
// Get Milvus embedding from the float vector column
|
|
milvusEmbedding := floatVecColumn.Data()[i]
|
|
|
|
require.NotNil(t, milvusEmbedding)
|
|
require.Equal(t, hp.GetTEIModelDim(), len(milvusEmbedding), "Embedding dimension should match")
|
|
|
|
// Calculate cosine similarity
|
|
similarity := hp.CosineSimilarity(milvusEmbedding, teiEmbeddings[teiIdx])
|
|
|
|
t.Logf("Document %d (ID=%d) similarity between Milvus and TEI: %.6f", i, id, similarity)
|
|
|
|
// Embeddings should be nearly identical (similarity > 0.99)
|
|
require.Greater(t, similarity, float32(0.99),
|
|
"Milvus embedding should be nearly identical to TEI embedding for document ID %d", id)
|
|
}
|
|
|
|
t.Log("Embedding consistency verified: Milvus text embedding function produces same results as direct TEI calls")
|
|
}
|
|
|
|
// TestUpsertTextFieldUpdatesEmbedding tests that upserting text field updates embedding
|
|
func TestUpsertTextFieldUpdatesEmbedding(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function (custom fields for autoID=false for upsert)
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(false)
|
|
|
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc,
|
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
|
fieldsOption,
|
|
schemaOption,
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
|
|
// create index and load first
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// insert initial data with specific ID
|
|
oldText := "This is the original text content"
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
|
WithInt64Column(common.DefaultInt64FieldName, []int64{1}).
|
|
WithVarcharColumn("document", []string{oldText}))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(1), res.InsertCount)
|
|
|
|
// query original embedding before upsert
|
|
resQueryBefore, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
|
WithFilter("int64 == 1").
|
|
WithOutputFields("document", "dense").
|
|
WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, 1, resQueryBefore.Len())
|
|
|
|
// extract original embedding
|
|
originalDenseColumn := resQueryBefore.GetColumn("dense")
|
|
require.NotNil(t, originalDenseColumn)
|
|
originalFloatVecColumn, ok := originalDenseColumn.(*column.ColumnFloatVector)
|
|
require.True(t, ok, "Dense column should be a float vector column")
|
|
originalEmbedding := originalFloatVecColumn.Data()[0]
|
|
require.Equal(t, hp.GetTEIModelDim(), len(originalEmbedding), "Original embedding dimension should match")
|
|
|
|
// verify original text
|
|
originalDocColumn := resQueryBefore.GetColumn("document")
|
|
require.NotNil(t, originalDocColumn)
|
|
originalVarCharColumn, ok := originalDocColumn.(*column.ColumnVarChar)
|
|
require.True(t, ok, "Document column should be a varchar column")
|
|
require.Equal(t, oldText, originalVarCharColumn.Data()[0], "Original text should match")
|
|
|
|
// upsert with new text
|
|
newText := "This is completely different updated text content"
|
|
res2, err := mc.Upsert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
|
WithInt64Column(common.DefaultInt64FieldName, []int64{1}).
|
|
WithVarcharColumn("document", []string{newText}))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(1), res2.UpsertCount)
|
|
|
|
// query updated embedding after upsert
|
|
resQueryAfter, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).
|
|
WithFilter("int64 == 1").
|
|
WithOutputFields("document", "dense").
|
|
WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, 1, resQueryAfter.Len())
|
|
|
|
// extract updated embedding
|
|
updatedDenseColumn := resQueryAfter.GetColumn("dense")
|
|
require.NotNil(t, updatedDenseColumn)
|
|
updatedFloatVecColumn, ok := updatedDenseColumn.(*column.ColumnFloatVector)
|
|
require.True(t, ok, "Dense column should be a float vector column")
|
|
updatedEmbedding := updatedFloatVecColumn.Data()[0]
|
|
require.Equal(t, hp.GetTEIModelDim(), len(updatedEmbedding), "Updated embedding dimension should match")
|
|
|
|
// verify updated text
|
|
updatedDocColumn := resQueryAfter.GetColumn("document")
|
|
require.NotNil(t, updatedDocColumn)
|
|
updatedVarCharColumn, ok := updatedDocColumn.(*column.ColumnVarChar)
|
|
require.True(t, ok, "Document column should be a varchar column")
|
|
require.Equal(t, newText, updatedVarCharColumn.Data()[0], "Updated text should match")
|
|
|
|
// verify embeddings are different (key assertion)
|
|
similarity := hp.CosineSimilarity(originalEmbedding, updatedEmbedding)
|
|
require.Less(t, similarity, float32(0.95),
|
|
"Embeddings should be significantly different after text update (similarity=%.6f)", similarity)
|
|
|
|
t.Logf("Upsert verification complete: Original and updated embeddings have cosine similarity %.6f (< 0.95)", similarity)
|
|
t.Logf(" Original text: %s", oldText)
|
|
t.Logf(" Updated text: %s", newText)
|
|
}
|
|
|
|
// TestDeleteAndSearch tests that deleted text cannot be searched
|
|
func TestDeleteAndSearch(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function (custom fields for autoID=false)
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(false)
|
|
|
|
prepare, schema := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc,
|
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
|
fieldsOption,
|
|
schemaOption,
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
|
|
// insert test data
|
|
documents := []string{
|
|
"This is test document 0",
|
|
"This is test document 1",
|
|
"This is test document 2",
|
|
}
|
|
ids := []int64{0, 1, 2}
|
|
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).
|
|
WithInt64Column(common.DefaultInt64FieldName, ids).
|
|
WithVarcharColumn("document", documents))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(3), res.InsertCount)
|
|
|
|
// create index and load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// delete document with ID 1
|
|
res2, err := mc.Delete(ctx, milvusclient.NewDeleteOption(schema.CollectionName).WithExpr("int64 in [1]"))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(1), res2.DeleteCount)
|
|
|
|
// search and verify document 1 is not in results
|
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 3, []entity.Vector{entity.Text("test document 1")}).
|
|
WithANNSField("dense").
|
|
WithOutputFields("document", common.DefaultInt64FieldName))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// verify deleted document is not in results
|
|
require.Greater(t, len(searchRes), 0)
|
|
for _, hits := range searchRes {
|
|
for i := 0; i < hits.Len(); i++ {
|
|
id, err := hits.IDs.GetAsInt64(i)
|
|
require.NoError(t, err)
|
|
require.NotEqual(t, int64(1), id, "Deleted document should not appear in search results")
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestSearchWithTextEmbedding tests search functionality with text embedding
|
|
func TestSearchWithTextEmbedding(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// prepare test data
|
|
nb := 10
|
|
documents := make([]string, nb)
|
|
for i := 0; i < nb; i++ {
|
|
documents[i] = fmt.Sprintf("This is test document number %d about artificial intelligence and machine learning", i)
|
|
}
|
|
|
|
// insert data using only text field
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(nb), res.InsertCount)
|
|
|
|
// create index and load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search using text query
|
|
queryText := "artificial intelligence machine learning"
|
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("dense").
|
|
WithOutputFields("document"))
|
|
common.CheckErr(t, err, true)
|
|
|
|
require.Greater(t, len(searchRes), 0)
|
|
for _, hits := range searchRes {
|
|
require.Greater(t, hits.Len(), 0, "Should find relevant documents")
|
|
require.LessOrEqual(t, hits.Len(), 5, "Should respect limit")
|
|
|
|
// verify results contain the search terms (semantic similarity)
|
|
for i := 0; i < hits.Len(); i++ {
|
|
score := hits.Scores[i]
|
|
require.Greater(t, score, float32(0), "Score should be positive")
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestSearchWithEmptyQuery tests search with empty query (should fail)
|
|
func TestSearchWithEmptyQuery(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// insert some test data
|
|
documents := []string{"test document"}
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(1), res.InsertCount)
|
|
|
|
// create index and load
|
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(schema.CollectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
|
common.CheckErr(t, err, true)
|
|
|
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(schema.CollectionName))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// search with empty query should fail
|
|
_, err = mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 3, []entity.Vector{entity.Text("")}).
|
|
WithANNSField("dense"))
|
|
|
|
common.CheckErr(t, err, false, "TextEmbedding function does not support empty text")
|
|
}
|
|
|
|
// TestHybridSearchTextEmbeddingBM25 tests hybrid search combining TEI text embedding and BM25
|
|
func TestHybridSearchTextEmbeddingBM25(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with both TEI text embedding and BM25 functions
|
|
collectionName := common.GenRandomString("hybrid_search", 6)
|
|
|
|
// create fields manually to support both dense and sparse vectors
|
|
fields := []*entity.Field{
|
|
entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true),
|
|
entity.NewField().WithName("document").WithDataType(entity.FieldTypeVarChar).WithMaxLength(65535).WithEnableAnalyzer(true).WithAnalyzerParams(map[string]any{"tokenizer": "standard"}),
|
|
entity.NewField().WithName("dense").WithDataType(entity.FieldTypeFloatVector).WithDim(int64(hp.GetTEIModelDim())),
|
|
entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector),
|
|
}
|
|
|
|
// create TEI text embedding function
|
|
teiFunction := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
})
|
|
|
|
// create BM25 function
|
|
bm25Function := hp.TNewBM25Function("document", "sparse")
|
|
|
|
// create schema with both functions
|
|
schema := entity.NewSchema().
|
|
WithName(collectionName).
|
|
WithDescription("Hybrid search collection with TEI and BM25").
|
|
WithFunction(teiFunction).
|
|
WithFunction(bm25Function)
|
|
|
|
for _, field := range fields {
|
|
schema.WithField(field)
|
|
}
|
|
|
|
// create collection
|
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// insert test data with diverse content
|
|
documents := []string{
|
|
"Artificial intelligence and machine learning are transforming technology",
|
|
"Vector databases enable semantic search capabilities for AI applications",
|
|
"Text embeddings capture semantic meaning in numerical representations",
|
|
"BM25 is a traditional keyword-based search algorithm",
|
|
"Hybrid search combines semantic and keyword-based retrieval methods",
|
|
"Large language models use transformer architectures for text understanding",
|
|
"Information retrieval systems help users find relevant documents",
|
|
"Natural language processing enables computers to understand human language",
|
|
"Database systems store and retrieve structured information efficiently",
|
|
"Search engines use ranking algorithms to order results by relevance",
|
|
}
|
|
|
|
// insert data - both embeddings will be generated automatically
|
|
res, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithVarcharColumn("document", documents))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, int64(len(documents)), res.InsertCount)
|
|
|
|
// create indexes
|
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "dense", index.NewAutoIndex(entity.COSINE)))
|
|
common.CheckErr(t, err, true)
|
|
|
|
_, err = mc.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "sparse", index.NewSparseInvertedIndex(entity.BM25, 0.1)))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// load collection
|
|
_, err = mc.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// test 1: Dense vector search (TEI semantic search)
|
|
t.Run("DenseVectorSearch", func(t *testing.T) {
|
|
queryText := "machine learning artificial intelligence"
|
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 3, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("dense").
|
|
WithOutputFields("document"))
|
|
common.CheckErr(t, err, true)
|
|
|
|
require.Greater(t, len(searchRes), 0)
|
|
for _, hits := range searchRes {
|
|
require.Greater(t, hits.Len(), 0, "Should find semantically similar documents")
|
|
t.Logf("Dense search found %d results for query: %s", hits.Len(), queryText)
|
|
}
|
|
})
|
|
|
|
// test 2: Sparse vector search (BM25 keyword search)
|
|
t.Run("SparseVectorSearch", func(t *testing.T) {
|
|
queryText := "database systems"
|
|
searchRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 3, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("sparse").
|
|
WithOutputFields("document"))
|
|
common.CheckErr(t, err, true)
|
|
|
|
require.Greater(t, len(searchRes), 0)
|
|
for _, hits := range searchRes {
|
|
require.Greater(t, hits.Len(), 0, "Should find keyword-matching documents")
|
|
t.Logf("Sparse search found %d results for query: %s", hits.Len(), queryText)
|
|
}
|
|
})
|
|
|
|
// test 3: Both search types work independently
|
|
t.Run("IndependentSearches", func(t *testing.T) {
|
|
queryText := "vector search"
|
|
|
|
// Dense search
|
|
denseRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("dense").
|
|
WithOutputFields("document"))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Sparse search
|
|
sparseRes, err := mc.Search(ctx, milvusclient.NewSearchOption(collectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("sparse").
|
|
WithOutputFields("document"))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Both should return results
|
|
require.Greater(t, len(denseRes), 0, "Dense search should return results")
|
|
require.Greater(t, len(sparseRes), 0, "Sparse search should return results")
|
|
|
|
for _, hits := range denseRes {
|
|
require.Greater(t, hits.Len(), 0, "Dense search should find documents")
|
|
}
|
|
|
|
for _, hits := range sparseRes {
|
|
require.Greater(t, hits.Len(), 0, "Sparse search should find documents")
|
|
}
|
|
|
|
t.Logf("Dense search found %d results, Sparse search found %d results",
|
|
len(denseRes), len(sparseRes))
|
|
})
|
|
}
|
|
|
|
// TestInsertEmptyDocument tests insertion with empty document
|
|
func TestInsertEmptyDocument(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// try to insert empty document
|
|
documents := []string{"", "normal document"}
|
|
|
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
|
|
// should fail with empty document
|
|
common.CheckErr(t, err, false, "TextEmbedding function does not support empty text")
|
|
}
|
|
|
|
// TestInsertLongDocument tests insertion with very long document
|
|
func TestInsertLongDocument(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function (no truncate)
|
|
params := map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": hp.GetTEIEndpoint(),
|
|
"truncate": "false",
|
|
}
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", params)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(true)
|
|
|
|
_, schema := hp.CollPrepare.CreateCollection(
|
|
ctx, t, mc,
|
|
hp.NewCreateCollectionParams(hp.TextEmbedding),
|
|
fieldsOption,
|
|
schemaOption,
|
|
hp.TWithConsistencyLevel(entity.ClStrong),
|
|
)
|
|
|
|
// try to insert very long document that exceeds model limits
|
|
longDocument := hp.GenLongText(8192, "english") // Very long text
|
|
documents := []string{longDocument}
|
|
|
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
|
|
// should fail with long document when truncate is false
|
|
common.CheckErr(t, err, false, "Call service failed")
|
|
}
|
|
|
|
// TestInvalidEndpointHandling tests various invalid endpoint scenarios
|
|
func TestInvalidEndpointHandling(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
endpoint string
|
|
errMsg string
|
|
}{
|
|
{"NonExistentHost", "http://nonexistent-host:8080", "nonexistent-host"},
|
|
{"InvalidPort", "http://localhost:99999", "99999"},
|
|
{"InvalidProtocol", "ftp://localhost:8080", "ftp"},
|
|
{"EmptyEndpoint", "", "endpoint"},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with invalid endpoint
|
|
function := hp.TNewTextEmbeddingFunction("document", "dense", map[string]any{
|
|
"provider": "TEI",
|
|
"endpoint": tc.endpoint,
|
|
})
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldOpts := hp.TNewFieldOptions().
|
|
WithFieldOption("document", hp.TNewFieldsOption().TWithMaxLen(common.MaxLength)).
|
|
WithFieldOption("dense", hp.TNewFieldsOption().TWithDim(int64(hp.GetTEIModelDim()))).
|
|
WithFieldOption(common.DefaultInt64FieldName, hp.TNewFieldsOption().TWithAutoID(true))
|
|
|
|
// collection creation should fail for invalid endpoints
|
|
collectionName := common.GenRandomString("test_invalid", 6)
|
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
|
collectionName,
|
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldOpts))),
|
|
))
|
|
|
|
common.CheckErr(t, err, false, tc.errMsg)
|
|
t.Logf("Expected error for %s: %v", tc.name, err)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestMissingRequiredParameters tests creation with missing required parameters
|
|
func TestMissingRequiredParameters(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
testCases := []struct {
|
|
name string
|
|
params map[string]any
|
|
errMsg string
|
|
}{
|
|
{"MissingProvider", map[string]any{"endpoint": hp.GetTEIEndpoint()}, "provider"},
|
|
{"MissingEndpoint", map[string]any{"provider": "TEI"}, "endpoint"},
|
|
{"WrongProvider", map[string]any{"provider": "InvalidProvider", "endpoint": hp.GetTEIEndpoint()}, "invalidprovider"},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
// create function with incomplete parameters
|
|
function := entity.NewFunction().
|
|
WithName("incomplete_func").
|
|
WithInputFields("document").
|
|
WithOutputFields("dense").
|
|
WithType(entity.FunctionTypeTextEmbedding)
|
|
|
|
for key, value := range tc.params {
|
|
function.WithParam(key, value)
|
|
}
|
|
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
fieldsOption := newTextEmbeddingFieldsOption(true)
|
|
|
|
// collection creation should fail
|
|
err := mc.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(
|
|
common.GenRandomString("test_incomplete", 6),
|
|
hp.GenSchema(schemaOption.TWithFields(hp.FieldsFact.GenFieldsForCollection(hp.TextEmbedding, fieldsOption))),
|
|
))
|
|
|
|
common.CheckErr(t, err, false, tc.errMsg)
|
|
t.Logf("Expected error for %s: %v", tc.name, err)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestConcurrentOperations tests concurrent text embedding operations
|
|
func TestConcurrentOperations(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2) // longer timeout for concurrent ops
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create collection with TEI function
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.TextEmbedding), newTextEmbeddingFieldsOption(true), hp.TNewTextEmbeddingSchemaOption(), hp.TWithConsistencyLevel(entity.ClStrong))
|
|
|
|
// create index and load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{"dense": index.NewAutoIndex(entity.COSINE)}))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// concurrent inserts
|
|
t.Run("ConcurrentInserts", func(t *testing.T) {
|
|
numRoutines := 5
|
|
documentsPerRoutine := 5
|
|
|
|
results := make(chan error, numRoutines)
|
|
|
|
for i := 0; i < numRoutines; i++ {
|
|
go func(routineID int) {
|
|
documents := make([]string, documentsPerRoutine)
|
|
for j := 0; j < documentsPerRoutine; j++ {
|
|
documents[j] = fmt.Sprintf("Concurrent document from routine %d, doc %d", routineID, j)
|
|
}
|
|
|
|
_, err := mc.Insert(ctx, milvusclient.NewColumnBasedInsertOption(schema.CollectionName).WithVarcharColumn("document", documents))
|
|
results <- err
|
|
}(i)
|
|
}
|
|
|
|
// wait for all goroutines to complete
|
|
for i := 0; i < numRoutines; i++ {
|
|
err := <-results
|
|
require.NoError(t, err, "Concurrent insert should succeed")
|
|
}
|
|
|
|
t.Logf("Successfully completed %d concurrent inserts with %d documents each", numRoutines, documentsPerRoutine)
|
|
})
|
|
|
|
// concurrent searches
|
|
t.Run("ConcurrentSearches", func(t *testing.T) {
|
|
numRoutines := 3
|
|
|
|
results := make(chan error, numRoutines)
|
|
|
|
for i := 0; i < numRoutines; i++ {
|
|
go func(routineID int) {
|
|
queryText := fmt.Sprintf("document routine %d", routineID)
|
|
_, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, 5, []entity.Vector{entity.Text(queryText)}).
|
|
WithANNSField("dense").
|
|
WithOutputFields("document"))
|
|
results <- err
|
|
}(i)
|
|
}
|
|
|
|
// wait for all searches to complete
|
|
for i := 0; i < numRoutines; i++ {
|
|
err := <-results
|
|
require.NoError(t, err, "Concurrent search should succeed")
|
|
}
|
|
|
|
t.Logf("Successfully completed %d concurrent searches", numRoutines)
|
|
})
|
|
}
|