feat: impl StructArray -- support same names in different STRUCT (#44557)

ref: https://github.com/milvus-io/milvus/issues/42148

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
Spade A 2025-10-10 15:53:56 +08:00 committed by GitHub
parent e5378a64bc
commit 208481a070
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 532 additions and 127 deletions

View File

@ -1743,17 +1743,21 @@ func TestProxy(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode) assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
}) })
fmt.Println("create index for binVec field")
fieldName := ConcatStructFieldName(structField, subFieldFVec)
wg.Add(1) wg.Add(1)
t.Run("create index for embedding list field", func(t *testing.T) { t.Run("create index for embedding list field", func(t *testing.T) {
defer wg.Done() defer wg.Done()
req := constructTestCreateIndexRequest(dbName, collectionName, schemapb.DataType_ArrayOfVector, subFieldFVec, dim, nlist) req := constructTestCreateIndexRequest(dbName, collectionName, schemapb.DataType_ArrayOfVector, fieldName, dim, nlist)
resp, err := proxy.CreateIndex(ctx, req) resp, err := proxy.CreateIndex(ctx, req)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode) assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
}) })
fmt.Println("create index for embedding list field")
wg.Add(1) wg.Add(1)
t.Run("alter index for embedding list field", func(t *testing.T) { t.Run("alter index for embedding list field", func(t *testing.T) {
defer wg.Done() defer wg.Done()
@ -1773,6 +1777,7 @@ func TestProxy(t *testing.T) {
err = merr.CheckRPCCall(resp, err) err = merr.CheckRPCCall(resp, err)
assert.NoError(t, err) assert.NoError(t, err)
}) })
fmt.Println("alter index for embedding list field")
wg.Add(1) wg.Add(1)
t.Run("describe index for embedding list field", func(t *testing.T) { t.Run("describe index for embedding list field", func(t *testing.T) {
@ -1781,7 +1786,7 @@ func TestProxy(t *testing.T) {
Base: nil, Base: nil,
DbName: dbName, DbName: dbName,
CollectionName: collectionName, CollectionName: collectionName,
FieldName: subFieldFVec, FieldName: fieldName,
IndexName: testStructFVecIndexName, IndexName: testStructFVecIndexName,
}) })
err = merr.CheckRPCCall(resp, err) err = merr.CheckRPCCall(resp, err)
@ -1790,6 +1795,7 @@ func TestProxy(t *testing.T) {
enableMmap, _ := common.IsMmapDataEnabled(resp.IndexDescriptions[0].GetParams()...) enableMmap, _ := common.IsMmapDataEnabled(resp.IndexDescriptions[0].GetParams()...)
assert.True(t, enableMmap, "params: %+v", resp.IndexDescriptions[0]) assert.True(t, enableMmap, "params: %+v", resp.IndexDescriptions[0])
}) })
fmt.Println("describe index for embedding list field")
wg.Add(1) wg.Add(1)
t.Run("describe index with indexName for embedding list field", func(t *testing.T) { t.Run("describe index with indexName for embedding list field", func(t *testing.T) {
@ -1798,13 +1804,14 @@ func TestProxy(t *testing.T) {
Base: nil, Base: nil,
DbName: dbName, DbName: dbName,
CollectionName: collectionName, CollectionName: collectionName,
FieldName: subFieldFVec, FieldName: fieldName,
IndexName: testStructFVecIndexName, IndexName: testStructFVecIndexName,
}) })
err = merr.CheckRPCCall(resp, err) err = merr.CheckRPCCall(resp, err)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode()) assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
}) })
fmt.Println("describe index with indexName for embedding list field")
wg.Add(1) wg.Add(1)
t.Run("get index statistics for embedding list field", func(t *testing.T) { t.Run("get index statistics for embedding list field", func(t *testing.T) {
@ -1827,7 +1834,7 @@ func TestProxy(t *testing.T) {
Base: nil, Base: nil,
DbName: dbName, DbName: dbName,
CollectionName: collectionName, CollectionName: collectionName,
FieldName: subFieldFVec, FieldName: fieldName,
IndexName: testStructFVecIndexName, IndexName: testStructFVecIndexName,
}) })
assert.NoError(t, err) assert.NoError(t, err)
@ -1841,7 +1848,7 @@ func TestProxy(t *testing.T) {
Base: nil, Base: nil,
DbName: dbName, DbName: dbName,
CollectionName: collectionName, CollectionName: collectionName,
FieldName: subFieldFVec, FieldName: fieldName,
IndexName: testStructFVecIndexName, IndexName: testStructFVecIndexName,
}) })
assert.NoError(t, err) assert.NoError(t, err)
@ -2038,7 +2045,7 @@ func TestProxy(t *testing.T) {
wg.Add(1) wg.Add(1)
t.Run("embedding list search", func(t *testing.T) { t.Run("embedding list search", func(t *testing.T) {
defer wg.Done() defer wg.Done()
req := constructTestEmbeddingListSearchRequest(dbName, collectionName, subFieldFVec, expr, nq, nprobe, topk, roundDecimal, dim) req := constructTestEmbeddingListSearchRequest(dbName, collectionName, fieldName, expr, nq, nprobe, topk, roundDecimal, dim)
resp, err := proxy.Search(ctx, req) resp, err := proxy.Search(ctx, req)
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -95,6 +95,47 @@ type CachedProxyServiceProvider struct {
*Proxy *Proxy
} }
// cloneStructArrayFields creates a deep copy of struct array fields to avoid modifying cached data
func cloneStructArrayFields(fields []*schemapb.StructArrayFieldSchema) []*schemapb.StructArrayFieldSchema {
if fields == nil {
return nil
}
cloned := make([]*schemapb.StructArrayFieldSchema, len(fields))
for i, field := range fields {
cloned[i] = &schemapb.StructArrayFieldSchema{
FieldID: field.FieldID,
Name: field.Name,
Description: field.Description,
Fields: make([]*schemapb.FieldSchema, len(field.Fields)),
}
// Deep copy sub-fields
for j, subField := range field.Fields {
cloned[i].Fields[j] = &schemapb.FieldSchema{
FieldID: subField.FieldID,
Name: subField.Name,
IsPrimaryKey: subField.IsPrimaryKey,
Description: subField.Description,
DataType: subField.DataType,
TypeParams: subField.TypeParams,
IndexParams: subField.IndexParams,
AutoID: subField.AutoID,
State: subField.State,
ElementType: subField.ElementType,
DefaultValue: subField.DefaultValue,
IsDynamic: subField.IsDynamic,
IsPartitionKey: subField.IsPartitionKey,
IsClusteringKey: subField.IsClusteringKey,
Nullable: subField.Nullable,
IsFunctionOutput: subField.IsFunctionOutput,
}
}
}
return cloned
}
func (node *CachedProxyServiceProvider) DescribeCollection(ctx context.Context, func (node *CachedProxyServiceProvider) DescribeCollection(ctx context.Context,
request *milvuspb.DescribeCollectionRequest, request *milvuspb.DescribeCollectionRequest,
) (resp *milvuspb.DescribeCollectionResponse, err error) { ) (resp *milvuspb.DescribeCollectionResponse, err error) {
@ -153,12 +194,19 @@ func (node *CachedProxyServiceProvider) DescribeCollection(ctx context.Context,
Fields: lo.Filter(c.schema.CollectionSchema.Fields, func(field *schemapb.FieldSchema, _ int) bool { Fields: lo.Filter(c.schema.CollectionSchema.Fields, func(field *schemapb.FieldSchema, _ int) bool {
return !field.IsDynamic return !field.IsDynamic
}), }),
StructArrayFields: c.schema.CollectionSchema.StructArrayFields, StructArrayFields: cloneStructArrayFields(c.schema.CollectionSchema.StructArrayFields),
EnableDynamicField: c.schema.CollectionSchema.EnableDynamicField, EnableDynamicField: c.schema.CollectionSchema.EnableDynamicField,
Properties: c.schema.CollectionSchema.Properties, Properties: c.schema.CollectionSchema.Properties,
Functions: c.schema.CollectionSchema.Functions, Functions: c.schema.CollectionSchema.Functions,
DbName: c.schema.CollectionSchema.DbName, DbName: c.schema.CollectionSchema.DbName,
} }
// Restore struct field names from internal format (structName[fieldName]) to original format
if err := restoreStructFieldNames(resp.Schema); err != nil {
log.Error("failed to restore struct field names", zap.Error(err))
return nil, err
}
resp.CollectionID = c.collID resp.CollectionID = c.collID
resp.UpdateTimestamp = c.updateTimestamp resp.UpdateTimestamp = c.updateTimestamp
resp.UpdateTimestampStr = fmt.Sprintf("%d", c.updateTimestamp) resp.UpdateTimestampStr = fmt.Sprintf("%d", c.updateTimestamp)

View File

@ -388,11 +388,6 @@ func (t *createCollectionTask) PreExecute(ctx context.Context) error {
return err return err
} }
// validate whether field names duplicates
if err := validateDuplicatedFieldName(t.schema); err != nil {
return err
}
// validate primary key definition // validate primary key definition
if err := validatePrimaryKey(t.schema); err != nil { if err := validatePrimaryKey(t.schema); err != nil {
return err return err
@ -440,6 +435,18 @@ func (t *createCollectionTask) PreExecute(ctx context.Context) error {
} }
} }
// Transform struct field names to ensure global uniqueness
// This allows different structs to have fields with the same name
err = transformStructFieldNames(t.schema)
if err != nil {
return fmt.Errorf("failed to transform struct field names: %v", err)
}
// validate whether field names duplicates (after transformation)
if err := validateDuplicatedFieldName(t.schema); err != nil {
return err
}
if err := validateMultipleVectorFields(t.schema); err != nil { if err := validateMultipleVectorFields(t.schema); err != nil {
return err return err
} }
@ -890,6 +897,11 @@ func (t *describeCollectionTask) Execute(ctx context.Context) error {
for _, function := range result.Schema.Functions { for _, function := range result.Schema.Functions {
t.result.Schema.Functions = append(t.result.Schema.Functions, proto.Clone(function).(*schemapb.FunctionSchema)) t.result.Schema.Functions = append(t.result.Schema.Functions, proto.Clone(function).(*schemapb.FunctionSchema))
} }
if err := restoreStructFieldNames(t.result.Schema); err != nil {
return fmt.Errorf("failed to restore struct field names: %v", err)
}
return nil return nil
} }

View File

@ -591,62 +591,6 @@ func (t *queryTask) Execute(ctx context.Context) error {
return nil return nil
} }
// FieldsData in results are flattened, so we need to reconstruct the struct fields
func reconstructStructFieldData(results *milvuspb.QueryResults, schema *schemapb.CollectionSchema) {
if len(results.OutputFields) == 1 && results.OutputFields[0] == "count(*)" {
return
}
if len(schema.StructArrayFields) == 0 {
return
}
regularFieldIDs := make(map[int64]interface{})
subFieldToStructMap := make(map[int64]int64)
groupedStructFields := make(map[int64][]*schemapb.FieldData)
structFieldNames := make(map[int64]string)
reconstructedOutputFields := make([]string, 0, len(results.FieldsData))
// record all regular field IDs
for _, field := range schema.Fields {
regularFieldIDs[field.GetFieldID()] = nil
}
// build the mapping from sub-field ID to struct field ID
for _, structField := range schema.StructArrayFields {
for _, subField := range structField.GetFields() {
subFieldToStructMap[subField.GetFieldID()] = structField.GetFieldID()
}
structFieldNames[structField.GetFieldID()] = structField.GetName()
}
fieldsData := make([]*schemapb.FieldData, 0, len(results.FieldsData))
for _, field := range results.FieldsData {
fieldID := field.GetFieldId()
if _, ok := regularFieldIDs[fieldID]; ok {
fieldsData = append(fieldsData, field)
reconstructedOutputFields = append(reconstructedOutputFields, field.GetFieldName())
} else {
structFieldID := subFieldToStructMap[fieldID]
groupedStructFields[structFieldID] = append(groupedStructFields[structFieldID], field)
}
}
for structFieldID, fields := range groupedStructFields {
fieldData := &schemapb.FieldData{
FieldName: structFieldNames[structFieldID],
FieldId: structFieldID,
Type: schemapb.DataType_ArrayOfStruct,
Field: &schemapb.FieldData_StructArrays{StructArrays: &schemapb.StructArrayField{Fields: fields}},
}
fieldsData = append(fieldsData, fieldData)
reconstructedOutputFields = append(reconstructedOutputFields, structFieldNames[structFieldID])
}
results.FieldsData = fieldsData
results.OutputFields = reconstructedOutputFields
}
func (t *queryTask) PostExecute(ctx context.Context) error { func (t *queryTask) PostExecute(ctx context.Context) error {
tr := timerecord.NewTimeRecorder("queryTask PostExecute") tr := timerecord.NewTimeRecorder("queryTask PostExecute")
defer func() { defer func() {

View File

@ -5229,21 +5229,22 @@ func TestCreateCollectionTaskWithStructArrayField(t *testing.T) {
assert.Equal(t, testStructArrayField, structArrayField.Name) assert.Equal(t, testStructArrayField, structArrayField.Name)
assert.Len(t, structArrayField.Fields, 3) assert.Len(t, structArrayField.Fields, 3)
// Verify sub-fields in StructArrayField // Verify sub-fields in StructArrayField have been transformed to structName[fieldName] format
subFields := structArrayField.Fields subFields := structArrayField.Fields
// sub_text_array // After PreExecute, field names should be transformed to structName[fieldName] format
assert.Equal(t, "sub_text_array", subFields[0].Name) // sub_text_array -> testStructArrayField[sub_text_array]
assert.Equal(t, fmt.Sprintf("%s[sub_text_array]", testStructArrayField), subFields[0].Name)
assert.Equal(t, schemapb.DataType_Array, subFields[0].DataType) assert.Equal(t, schemapb.DataType_Array, subFields[0].DataType)
assert.Equal(t, schemapb.DataType_VarChar, subFields[0].ElementType) assert.Equal(t, schemapb.DataType_VarChar, subFields[0].ElementType)
// sub_int_array // sub_int_array -> testStructArrayField[sub_int_array]
assert.Equal(t, "sub_int_array", subFields[1].Name) assert.Equal(t, fmt.Sprintf("%s[sub_int_array]", testStructArrayField), subFields[1].Name)
assert.Equal(t, schemapb.DataType_Array, subFields[1].DataType) assert.Equal(t, schemapb.DataType_Array, subFields[1].DataType)
assert.Equal(t, schemapb.DataType_Int32, subFields[1].ElementType) assert.Equal(t, schemapb.DataType_Int32, subFields[1].ElementType)
// sub_float_vector_array // sub_float_vector_array -> testStructArrayField[sub_float_vector_array]
assert.Equal(t, "sub_float_vector_array", subFields[2].Name) assert.Equal(t, fmt.Sprintf("%s[sub_float_vector_array]", testStructArrayField), subFields[2].Name)
assert.Equal(t, schemapb.DataType_ArrayOfVector, subFields[2].DataType) assert.Equal(t, schemapb.DataType_ArrayOfVector, subFields[2].DataType)
assert.Equal(t, schemapb.DataType_FloatVector, subFields[2].ElementType) assert.Equal(t, schemapb.DataType_FloatVector, subFields[2].ElementType)
@ -5255,6 +5256,130 @@ func TestCreateCollectionTaskWithStructArrayField(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
}) })
t.Run("test struct field name transformation with duplicate field names", func(t *testing.T) {
// Create a schema with multiple structs having the same sub-field names
schemaWithDuplicateNames := &schemapb.CollectionSchema{
Name: collectionName + "_duplicate_names",
Fields: []*schemapb.FieldSchema{
{
FieldID: 100,
Name: testInt64Field,
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: false,
},
{
FieldID: 101,
Name: testFloatVecField,
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(testVecDim),
},
},
},
},
StructArrayFields: []*schemapb.StructArrayFieldSchema{
{
FieldID: 102,
Name: "struct1",
Description: "first struct",
Fields: []*schemapb.FieldSchema{
{
FieldID: 1021,
Name: "field_name", // Duplicate name
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32,
},
{
FieldID: 1022,
Name: "common_field", // Another duplicate name
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_VarChar,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.MaxLengthKey,
Value: "100",
},
},
},
},
},
{
FieldID: 103,
Name: "struct2",
Description: "second struct",
Fields: []*schemapb.FieldSchema{
{
FieldID: 1031,
Name: "field_name", // Same name as struct1's field
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Float,
},
{
FieldID: 1032,
Name: "common_field", // Same name as struct1's field
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Bool,
},
},
},
},
}
marshaledDuplicateSchema, err := proto.Marshal(schemaWithDuplicateNames)
assert.NoError(t, err)
duplicateTask := &createCollectionTask{
Condition: NewTaskCondition(ctx),
CreateCollectionRequest: &milvuspb.CreateCollectionRequest{
Base: nil,
DbName: dbName,
CollectionName: collectionName + "_duplicate_names",
Schema: marshaledDuplicateSchema,
ShardsNum: shardsNum,
},
ctx: ctx,
mixCoord: mix,
result: nil,
schema: nil,
}
err = duplicateTask.OnEnqueue()
assert.NoError(t, err)
// PreExecute should succeed and transform the field names
err = duplicateTask.PreExecute(ctx)
assert.NoError(t, err)
// Verify that field names have been transformed to avoid conflicts
assert.NotNil(t, duplicateTask.schema)
assert.Len(t, duplicateTask.schema.StructArrayFields, 2)
// Check struct1's fields
struct1 := duplicateTask.schema.StructArrayFields[0]
assert.Equal(t, "struct1", struct1.Name)
assert.Equal(t, "struct1[field_name]", struct1.Fields[0].Name)
assert.Equal(t, "struct1[common_field]", struct1.Fields[1].Name)
// Check struct2's fields
struct2 := duplicateTask.schema.StructArrayFields[1]
assert.Equal(t, "struct2", struct2.Name)
assert.Equal(t, "struct2[field_name]", struct2.Fields[0].Name)
assert.Equal(t, "struct2[common_field]", struct2.Fields[1].Name)
// Verify that despite having the same original names, the transformed names are unique
allFieldNames := make(map[string]bool)
for _, structField := range duplicateTask.schema.StructArrayFields {
for _, field := range structField.Fields {
// Each transformed name should be unique
assert.False(t, allFieldNames[field.Name], "Duplicate field name found: %s", field.Name)
allFieldNames[field.Name] = true
}
}
})
t.Run("validate struct array field constraints", func(t *testing.T) { t.Run("validate struct array field constraints", func(t *testing.T) {
// Test invalid sub-field in StructArrayField // Test invalid sub-field in StructArrayField
invalidSchema := constructCollectionSchemaWithStructArrayField(collectionName+"_invalid", testStructArrayField, false) invalidSchema := constructCollectionSchemaWithStructArrayField(collectionName+"_invalid", testStructArrayField, false)

View File

@ -83,6 +83,66 @@ const (
var logger = log.L().WithOptions(zap.Fields(zap.String("role", typeutil.ProxyRole))) var logger = log.L().WithOptions(zap.Fields(zap.String("role", typeutil.ProxyRole)))
func ConcatStructFieldName(structName string, fieldName string) string {
return fmt.Sprintf("%s[%s]", structName, fieldName)
}
// transformStructFieldNames transforms struct field names to structName[fieldName] format
// This ensures global uniqueness while allowing same field names across different structs
func transformStructFieldNames(schema *schemapb.CollectionSchema) error {
for _, structArrayField := range schema.StructArrayFields {
structName := structArrayField.Name
for _, field := range structArrayField.Fields {
// Create transformed name: structName[fieldName]
newName := ConcatStructFieldName(structName, field.Name)
field.Name = newName
}
}
return nil
}
// restoreStructFieldNames restores original field names from structName[fieldName] format
// This is used when returning schema information to users (e.g., in describe collection)
func restoreStructFieldNames(schema *schemapb.CollectionSchema) error {
for _, structArrayField := range schema.StructArrayFields {
structName := structArrayField.Name
expectedPrefix := structName + "["
for _, field := range structArrayField.Fields {
if strings.HasPrefix(field.Name, expectedPrefix) && strings.HasSuffix(field.Name, "]") {
// Extract fieldName: remove "structName[" prefix and "]" suffix
field.Name = field.Name[len(expectedPrefix) : len(field.Name)-1]
}
}
}
return nil
}
// extractOriginalFieldName extracts the original field name from structName[fieldName] format
// This function should only be called on transformed struct field names
func extractOriginalFieldName(transformedName string) (string, error) {
idx := strings.Index(transformedName, "[")
if idx == -1 {
return "", fmt.Errorf("not a transformed struct field name: %s", transformedName)
}
if !strings.HasSuffix(transformedName, "]") {
return "", fmt.Errorf("invalid struct field format: %s, missing closing bracket", transformedName)
}
if idx == 0 {
return "", fmt.Errorf("invalid struct field format: %s, missing struct name", transformedName)
}
fieldName := transformedName[idx+1 : len(transformedName)-1]
if fieldName == "" {
return "", fmt.Errorf("invalid struct field format: %s, empty field name", transformedName)
}
return fieldName, nil
}
// isAlpha check if c is alpha. // isAlpha check if c is alpha.
func isAlpha(c uint8) bool { func isAlpha(c uint8) bool {
if (c < 'A' || c > 'Z') && (c < 'a' || c > 'z') { if (c < 'A' || c > 'Z') && (c < 'a' || c > 'z') {
@ -1828,7 +1888,8 @@ func checkAndFlattenStructFieldData(schema *schemapb.CollectionSchema, insertMsg
continue continue
} }
structSchema, ok := structSchemaMap[fieldData.FieldName] structName := fieldData.FieldName
structSchema, ok := structSchemaMap[structName]
if !ok { if !ok {
return fmt.Errorf("fieldName %v not exist in collection schema, fieldType %v, fieldId %v", fieldData.FieldName, fieldData.Type, fieldData.FieldId) return fmt.Errorf("fieldName %v not exist in collection schema, fieldType %v, fieldId %v", fieldData.FieldName, fieldData.Type, fieldData.FieldId)
} }
@ -1837,13 +1898,13 @@ func checkAndFlattenStructFieldData(schema *schemapb.CollectionSchema, insertMsg
structArrays, ok := fieldData.Field.(*schemapb.FieldData_StructArrays) structArrays, ok := fieldData.Field.(*schemapb.FieldData_StructArrays)
if !ok { if !ok {
return fmt.Errorf("field convert FieldData_StructArrays fail in fieldData, fieldName: %s,"+ return fmt.Errorf("field convert FieldData_StructArrays fail in fieldData, fieldName: %s,"+
" collectionName:%s", fieldData.FieldName, schema.Name) " collectionName:%s", structName, schema.Name)
} }
if len(structArrays.StructArrays.Fields) != len(structSchema.GetFields()) { if len(structArrays.StructArrays.Fields) != len(structSchema.GetFields()) {
return fmt.Errorf("length of fields of struct field mismatch length of the fields in schema, fieldName: %s,"+ return fmt.Errorf("length of fields of struct field mismatch length of the fields in schema, fieldName: %s,"+
" collectionName:%s, fieldData fields length:%d, schema fields length:%d", " collectionName:%s, fieldData fields length:%d, schema fields length:%d",
fieldData.FieldName, schema.Name, len(structArrays.StructArrays.Fields), len(structSchema.GetFields())) structName, schema.Name, len(structArrays.StructArrays.Fields), len(structSchema.GetFields()))
} }
// Check the array length of the struct array field data // Check the array length of the struct array field data
@ -1857,27 +1918,36 @@ func checkAndFlattenStructFieldData(schema *schemapb.CollectionSchema, insertMsg
currentArrayLen = len(scalarArray.Data) currentArrayLen = len(scalarArray.Data)
} else { } else {
return fmt.Errorf("scalar array data is nil in struct field '%s', sub-field '%s'", return fmt.Errorf("scalar array data is nil in struct field '%s', sub-field '%s'",
fieldData.FieldName, subField.FieldName) structName, subField.FieldName)
} }
case *schemapb.FieldData_Vectors: case *schemapb.FieldData_Vectors:
if vectorArray := subFieldData.Vectors.GetVectorArray(); vectorArray != nil { if vectorArray := subFieldData.Vectors.GetVectorArray(); vectorArray != nil {
currentArrayLen = len(vectorArray.Data) currentArrayLen = len(vectorArray.Data)
} else { } else {
return fmt.Errorf("vector array data is nil in struct field '%s', sub-field '%s'", return fmt.Errorf("vector array data is nil in struct field '%s', sub-field '%s'",
fieldData.FieldName, subField.FieldName) structName, subField.FieldName)
} }
default: default:
return fmt.Errorf("unexpected field data type in struct array field, fieldName: %s", fieldData.FieldName) return fmt.Errorf("unexpected field data type in struct array field, fieldName: %s", structName)
} }
if expectedArrayLen == -1 { if expectedArrayLen == -1 {
expectedArrayLen = currentArrayLen expectedArrayLen = currentArrayLen
} else if currentArrayLen != expectedArrayLen { } else if currentArrayLen != expectedArrayLen {
return fmt.Errorf("inconsistent array length in struct field '%s': expected %d, got %d for sub-field '%s'", return fmt.Errorf("inconsistent array length in struct field '%s': expected %d, got %d for sub-field '%s'",
fieldData.FieldName, expectedArrayLen, currentArrayLen, subField.FieldName) structName, expectedArrayLen, currentArrayLen, subField.FieldName)
} }
flattenedFields = append(flattenedFields, subField) transformedFieldName := ConcatStructFieldName(structName, subField.FieldName)
subFieldCopy := &schemapb.FieldData{
FieldName: transformedFieldName,
FieldId: subField.FieldId,
Type: subField.Type,
Field: subField.Field,
IsDynamic: subField.IsDynamic,
}
flattenedFields = append(flattenedFields, subFieldCopy)
} }
} }
@ -2739,11 +2809,29 @@ func reconstructStructFieldDataCommon(
} }
for structFieldID, fields := range groupedStructFields { for structFieldID, fields := range groupedStructFields {
// Create deep copies of fields to avoid modifying original data
// and restore original field names for user-facing response
copiedFields := make([]*schemapb.FieldData, len(fields))
for i, field := range fields {
copiedFields[i] = proto.Clone(field).(*schemapb.FieldData)
// Extract original field name from structName[fieldName] format
originalName, err := extractOriginalFieldName(copiedFields[i].FieldName)
if err != nil {
// This should not happen in normal operation - indicates a bug
log.Error("failed to extract original field name from struct field",
zap.String("fieldName", copiedFields[i].FieldName),
zap.Error(err))
// Keep the transformed name to avoid data corruption
} else {
copiedFields[i].FieldName = originalName
}
}
fieldData := &schemapb.FieldData{ fieldData := &schemapb.FieldData{
FieldName: structFieldNames[structFieldID], FieldName: structFieldNames[structFieldID],
FieldId: structFieldID, FieldId: structFieldID,
Type: schemapb.DataType_ArrayOfStruct, Type: schemapb.DataType_ArrayOfStruct,
Field: &schemapb.FieldData_StructArrays{StructArrays: &schemapb.StructArrayField{Fields: fields}}, Field: &schemapb.FieldData_StructArrays{StructArrays: &schemapb.StructArrayField{Fields: copiedFields}},
} }
newFieldsData = append(newFieldsData, fieldData) newFieldsData = append(newFieldsData, fieldData)
reconstructedOutputFields = append(reconstructedOutputFields, structFieldNames[structFieldID]) reconstructedOutputFields = append(reconstructedOutputFields, structFieldNames[structFieldID])

View File

@ -3484,8 +3484,8 @@ func TestCheckAndFlattenStructFieldData(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, insertMsg.FieldsData, 2) assert.Len(t, insertMsg.FieldsData, 2)
assert.Equal(t, "age_array", insertMsg.FieldsData[0].FieldName) assert.Equal(t, "user_info[age_array]", insertMsg.FieldsData[0].FieldName)
assert.Equal(t, "score_array", insertMsg.FieldsData[1].FieldName) assert.Equal(t, "user_info[score_array]", insertMsg.FieldsData[1].FieldName)
}) })
t.Run("success - valid struct array field with vector arrays", func(t *testing.T) { t.Run("success - valid struct array field with vector arrays", func(t *testing.T) {
@ -3513,7 +3513,7 @@ func TestCheckAndFlattenStructFieldData(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, insertMsg.FieldsData, 1) assert.Len(t, insertMsg.FieldsData, 1)
assert.Equal(t, "embeddings", insertMsg.FieldsData[0].FieldName) assert.Equal(t, "embedding_info[embeddings]", insertMsg.FieldsData[0].FieldName)
assert.Equal(t, schemapb.DataType_ArrayOfVector, insertMsg.FieldsData[0].Type) assert.Equal(t, schemapb.DataType_ArrayOfVector, insertMsg.FieldsData[0].Type)
}) })
@ -3556,9 +3556,9 @@ func TestCheckAndFlattenStructFieldData(t *testing.T) {
for i, field := range insertMsg.FieldsData { for i, field := range insertMsg.FieldsData {
fieldNames[i] = field.FieldName fieldNames[i] = field.FieldName
} }
assert.Contains(t, fieldNames, "field1") assert.Contains(t, fieldNames, "struct1[field1]")
assert.Contains(t, fieldNames, "field2") assert.Contains(t, fieldNames, "struct2[field2]")
assert.Contains(t, fieldNames, "field3") assert.Contains(t, fieldNames, "struct2[field3]")
}) })
t.Run("success - mixed normal and struct fields", func(t *testing.T) { t.Run("success - mixed normal and struct fields", func(t *testing.T) {
@ -3591,7 +3591,7 @@ func TestCheckAndFlattenStructFieldData(t *testing.T) {
fieldNames[i] = field.FieldName fieldNames[i] = field.FieldName
} }
assert.Contains(t, fieldNames, "id") assert.Contains(t, fieldNames, "id")
assert.Contains(t, fieldNames, "tags") assert.Contains(t, fieldNames, "metadata[tags]")
}) })
t.Run("success - empty struct array fields", func(t *testing.T) { t.Run("success - empty struct array fields", func(t *testing.T) {
@ -3836,7 +3836,7 @@ func TestCheckAndFlattenStructFieldData(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, insertMsg.FieldsData, 1) assert.Len(t, insertMsg.FieldsData, 1)
assert.Equal(t, "single_field", insertMsg.FieldsData[0].FieldName) assert.Equal(t, "single_element_struct[single_field]", insertMsg.FieldsData[0].FieldName)
}) })
} }
@ -4112,7 +4112,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
{ {
FieldID: 1021, FieldID: 1021,
Name: "sub_field", Name: "test_struct[sub_field]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32, ElementType: schemapb.DataType_Int32,
}, },
@ -4133,6 +4133,66 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
assert.Equal(t, originalOutputFields, resultOutputFields) assert.Equal(t, originalOutputFields, resultOutputFields)
}) })
t.Run("struct field query - should reconstruct struct field", func(t *testing.T) {
fieldsData := []*schemapb.FieldData{
{
FieldName: "test_struct[sub_field]",
FieldId: 1021, // Use the correct field ID that matches the schema
Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}},
},
},
},
},
},
},
},
},
}
outputFields := []string{"test_struct[sub_field]"}
schema := &schemapb.CollectionSchema{
StructArrayFields: []*schemapb.StructArrayFieldSchema{
{
FieldID: 102,
Name: "test_struct",
Fields: []*schemapb.FieldSchema{
{
FieldID: 1021,
Name: "test_struct[sub_field]",
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32,
},
},
},
},
}
resultFieldsData, resultOutputFields := reconstructStructFieldDataCommon(fieldsData, outputFields, schema)
// Should reconstruct the struct field with the restored field name
assert.Len(t, resultFieldsData, 1)
assert.Equal(t, "test_struct", resultFieldsData[0].FieldName)
assert.Equal(t, int64(102), resultFieldsData[0].FieldId)
assert.Equal(t, schemapb.DataType_ArrayOfStruct, resultFieldsData[0].Type)
// Check that the sub-field name has been restored
structArrayField := resultFieldsData[0].GetStructArrays()
assert.NotNil(t, structArrayField)
assert.Len(t, structArrayField.Fields, 1)
assert.Equal(t, "sub_field", structArrayField.Fields[0].FieldName) // Name should be restored
assert.Equal(t, []string{"test_struct"}, resultOutputFields)
})
t.Run("no struct array fields - should return early", func(t *testing.T) { t.Run("no struct array fields - should return early", func(t *testing.T) {
fieldsData := []*schemapb.FieldData{ fieldsData := []*schemapb.FieldData{
{ {
@ -4165,9 +4225,9 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
}) })
t.Run("reconstruct single struct field", func(t *testing.T) { t.Run("reconstruct single struct field", func(t *testing.T) {
// Create mock data // Create mock data with transformed field names (as they would be internally)
subField1Data := &schemapb.FieldData{ subField1Data := &schemapb.FieldData{
FieldName: "sub_int_array", FieldName: "test_struct[sub_int_array]",
FieldId: 1021, FieldId: 1021,
Type: schemapb.DataType_Array, Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4189,7 +4249,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
} }
subField2Data := &schemapb.FieldData{ subField2Data := &schemapb.FieldData{
FieldName: "sub_text_array", FieldName: "test_struct[sub_text_array]",
FieldId: 1022, FieldId: 1022,
Type: schemapb.DataType_Array, Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4211,7 +4271,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
} }
fieldsData := []*schemapb.FieldData{subField1Data, subField2Data} fieldsData := []*schemapb.FieldData{subField1Data, subField2Data}
outputFields := []string{"sub_int_array", "sub_text_array"} outputFields := []string{"test_struct[sub_int_array]", "test_struct[sub_text_array]"}
schema := &schemapb.CollectionSchema{ schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
@ -4229,13 +4289,13 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
{ {
FieldID: 1021, FieldID: 1021,
Name: "sub_int_array", Name: "test_struct[sub_int_array]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32, ElementType: schemapb.DataType_Int32,
}, },
{ {
FieldID: 1022, FieldID: 1022,
Name: "sub_text_array", Name: "test_struct[sub_text_array]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_VarChar, ElementType: schemapb.DataType_VarChar,
}, },
@ -4294,9 +4354,9 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
}, },
} }
// Create struct sub field data // Create struct sub field data with transformed name
subFieldData := &schemapb.FieldData{ subFieldData := &schemapb.FieldData{
FieldName: "sub_field", FieldName: "test_struct[sub_field]",
FieldId: 1021, FieldId: 1021,
Type: schemapb.DataType_Array, Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4318,7 +4378,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
} }
fieldsData := []*schemapb.FieldData{regularField, subFieldData} fieldsData := []*schemapb.FieldData{regularField, subFieldData}
outputFields := []string{"regular_field", "sub_field"} outputFields := []string{"regular_field", "test_struct[sub_field]"}
schema := &schemapb.CollectionSchema{ schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
@ -4335,7 +4395,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
{ {
FieldID: 1021, FieldID: 1021,
Name: "sub_field", Name: "test_struct[sub_field]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32, ElementType: schemapb.DataType_Int32,
}, },
@ -4373,7 +4433,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
t.Run("multiple struct fields", func(t *testing.T) { t.Run("multiple struct fields", func(t *testing.T) {
// Create sub field for first struct // Create sub field for first struct
struct1SubField := &schemapb.FieldData{ struct1SubField := &schemapb.FieldData{
FieldName: "struct1_sub", FieldName: "struct1[struct1_sub]",
FieldId: 1021, FieldId: 1021,
Type: schemapb.DataType_Array, Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4390,7 +4450,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
// Create sub fields for second struct // Create sub fields for second struct
struct2SubField1 := &schemapb.FieldData{ struct2SubField1 := &schemapb.FieldData{
FieldName: "struct2_sub1", FieldName: "struct2[struct2_sub1]",
FieldId: 1031, FieldId: 1031,
Type: schemapb.DataType_Array, Type: schemapb.DataType_Array,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4406,7 +4466,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
} }
struct2SubField2 := &schemapb.FieldData{ struct2SubField2 := &schemapb.FieldData{
FieldName: "struct2_sub2", FieldName: "struct2[struct2_sub2]",
FieldId: 1032, FieldId: 1032,
Type: schemapb.DataType_VarChar, Type: schemapb.DataType_VarChar,
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
@ -4419,7 +4479,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
} }
fieldsData := []*schemapb.FieldData{struct1SubField, struct2SubField1, struct2SubField2} fieldsData := []*schemapb.FieldData{struct1SubField, struct2SubField1, struct2SubField2}
outputFields := []string{"struct1_sub", "struct2_sub1", "struct2_sub2"} outputFields := []string{"struct1[struct1_sub]", "struct2[struct2_sub1]", "struct2[struct2_sub2]"}
schema := &schemapb.CollectionSchema{ schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
@ -4437,7 +4497,7 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
{ {
FieldID: 1021, FieldID: 1021,
Name: "struct1_sub", Name: "struct1[struct1_sub]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32, ElementType: schemapb.DataType_Int32,
}, },
@ -4449,13 +4509,13 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
Fields: []*schemapb.FieldSchema{ Fields: []*schemapb.FieldSchema{
{ {
FieldID: 1031, FieldID: 1031,
Name: "struct2_sub1", Name: "struct2[struct2_sub1]",
DataType: schemapb.DataType_Array, DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int32, ElementType: schemapb.DataType_Int32,
}, },
{ {
FieldID: 1032, FieldID: 1032,
Name: "struct2_sub2", Name: "struct2[struct2_sub2]",
DataType: schemapb.DataType_VarChar, DataType: schemapb.DataType_VarChar,
}, },
}, },
@ -4492,6 +4552,115 @@ func Test_reconstructStructFieldDataCommon(t *testing.T) {
assert.True(t, foundStruct1, "Should find struct1") assert.True(t, foundStruct1, "Should find struct1")
assert.True(t, foundStruct2, "Should find struct2") assert.True(t, foundStruct2, "Should find struct2")
}) })
t.Run("partial struct fields query - only return queried fields", func(t *testing.T) {
// Create a struct with 3 fields, but only query 2 of them
// This tests that we only return what the user requested
// Create mock data for only 2 out of 3 struct fields
clipStrData := &schemapb.FieldData{
FieldName: "clip[str]",
FieldId: 2001,
Type: schemapb.DataType_VarChar,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{Data: []string{"text1", "text2"}},
},
},
},
}
clipIntData := &schemapb.FieldData{
FieldName: "clip[int]",
FieldId: 2002,
Type: schemapb.DataType_Int32,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{100, 200}},
},
},
},
}
// Note: clip[embedding] is NOT included in query results
fieldsData := []*schemapb.FieldData{clipStrData, clipIntData}
outputFields := []string{"clip[str]", "clip[int]"}
schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: 100,
Name: "pk",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
},
},
StructArrayFields: []*schemapb.StructArrayFieldSchema{
{
FieldID: 200,
Name: "clip",
Fields: []*schemapb.FieldSchema{
{
FieldID: 2001,
Name: "clip[str]",
DataType: schemapb.DataType_VarChar,
},
{
FieldID: 2002,
Name: "clip[int]",
DataType: schemapb.DataType_Int32,
},
{
FieldID: 2003,
Name: "clip[embedding]",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "128"},
},
},
},
},
},
}
resultFieldsData, resultOutputFields := reconstructStructFieldDataCommon(fieldsData, outputFields, schema)
// Check result
assert.Len(t, resultFieldsData, 1, "Should have one reconstructed struct field")
assert.Len(t, resultOutputFields, 1, "Output fields should have one")
structField := resultFieldsData[0]
assert.Equal(t, "clip", structField.FieldName)
assert.Equal(t, int64(200), structField.FieldId)
assert.Equal(t, schemapb.DataType_ArrayOfStruct, structField.Type)
assert.Equal(t, "clip", resultOutputFields[0])
// Check that struct only contains the 2 queried fields, NOT the embedding field
structArrays := structField.GetStructArrays()
assert.NotNil(t, structArrays)
assert.Len(t, structArrays.Fields, 2, "Struct should only contain 2 queried fields, not 3")
// Verify the field names have been restored to original names
var foundStr, foundInt bool
for _, field := range structArrays.Fields {
switch field.FieldId {
case 2001:
assert.Equal(t, "str", field.FieldName, "Field name should be restored to original")
assert.Equal(t, schemapb.DataType_VarChar, field.Type)
foundStr = true
case 2002:
assert.Equal(t, "int", field.FieldName, "Field name should be restored to original")
assert.Equal(t, schemapb.DataType_Int32, field.Type)
foundInt = true
case 2003:
assert.Fail(t, "Should not include embedding field as it was not queried")
}
}
assert.True(t, foundStr, "Should find str field")
assert.True(t, foundInt, "Should find int field")
})
} }
func TestLackOfFieldsDataBySchema(t *testing.T) { func TestLackOfFieldsDataBySchema(t *testing.T) {

View File

@ -595,8 +595,8 @@ func TestCreateCollectionDuplicateField(t *testing.T) {
mc := hp.CreateDefaultMilvusClient(ctx, t) mc := hp.CreateDefaultMilvusClient(ctx, t)
// duplicate field // duplicate field
pkField := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true) pkField := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(1000)
pkField2 := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar) pkField2 := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar).WithMaxLength(1000)
vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim)
// two vector fields have same name // two vector fields have same name

View File

@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proxy"
"github.com/milvus-io/milvus/pkg/v2/common" "github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/log" "github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/merr" "github.com/milvus-io/milvus/pkg/v2/util/merr"
@ -169,16 +170,17 @@ func (s *ArrayStructDataNodeSuite) loadCollection(collectionName string) {
log.Info("=========================Index created for float vector=========================") log.Info("=========================Index created for float vector=========================")
s.WaitForIndexBuilt(context.TODO(), collectionName, integration.FloatVecField) s.WaitForIndexBuilt(context.TODO(), collectionName, integration.FloatVecField)
subFieldName := proxy.ConcatStructFieldName(integration.StructArrayField, integration.StructSubFloatVecField)
createIndexResult, err := c.MilvusClient.CreateIndex(context.TODO(), &milvuspb.CreateIndexRequest{ createIndexResult, err := c.MilvusClient.CreateIndex(context.TODO(), &milvuspb.CreateIndexRequest{
DbName: dbName, DbName: dbName,
CollectionName: collectionName, CollectionName: collectionName,
FieldName: integration.StructSubFloatVecField, FieldName: subFieldName,
IndexName: "array_of_vector_index", IndexName: "array_of_vector_index",
ExtraParams: integration.ConstructIndexParam(s.dim, integration.IndexEmbListHNSW, metric.MaxSim), ExtraParams: integration.ConstructIndexParam(s.dim, integration.IndexEmbListHNSW, metric.MaxSim),
}) })
s.NoError(err) s.NoError(err)
s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success) s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success)
s.WaitForIndexBuilt(context.TODO(), collectionName, integration.StructSubFloatVecField) s.WaitForIndexBuilt(context.TODO(), collectionName, subFieldName)
log.Info("=========================Index created for array of vector=========================") log.Info("=========================Index created for array of vector=========================")
@ -315,9 +317,10 @@ func (s *ArrayStructDataNodeSuite) query(collectionName string) {
topk := 10 topk := 10
roundDecimal := -1 roundDecimal := -1
subFieldName := proxy.ConcatStructFieldName(integration.StructArrayField, integration.StructSubFloatVecField)
params := integration.GetSearchParams(integration.IndexEmbListHNSW, metric.MaxSim) params := integration.GetSearchParams(integration.IndexEmbListHNSW, metric.MaxSim)
searchReq := integration.ConstructEmbeddingListSearchRequest("", collectionName, expr, searchReq := integration.ConstructEmbeddingListSearchRequest("", collectionName, expr,
integration.StructSubFloatVecField, schemapb.DataType_FloatVector, []string{integration.StructArrayField}, metric.MaxSim, params, nq, s.dim, topk, roundDecimal) subFieldName, schemapb.DataType_FloatVector, []string{integration.StructArrayField}, metric.MaxSim, params, nq, s.dim, topk, roundDecimal)
searchResult, _ := c.MilvusClient.Search(context.TODO(), searchReq) searchResult, _ := c.MilvusClient.Search(context.TODO(), searchReq)

View File

@ -27,6 +27,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proxy"
"github.com/milvus-io/milvus/pkg/v2/common" "github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/util/funcutil" "github.com/milvus-io/milvus/pkg/v2/util/funcutil"
"github.com/milvus-io/milvus/pkg/v2/util/metric" "github.com/milvus-io/milvus/pkg/v2/util/metric"
@ -198,18 +199,19 @@ func (s *TestArrayStructSuite) run() {
s.WaitForIndexBuiltWithDB(ctx, s.dbName, collection, vecFieldName) s.WaitForIndexBuiltWithDB(ctx, s.dbName, collection, vecFieldName)
subFieldName := proxy.ConcatStructFieldName(structFieldName, structSubVecFieldName)
// create index for struct sub-vector field // create index for struct sub-vector field
createIndexResult, err := s.Cluster.MilvusClient.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ createIndexResult, err := s.Cluster.MilvusClient.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
DbName: s.dbName, DbName: s.dbName,
CollectionName: collection, CollectionName: collection,
FieldName: structSubVecFieldName, FieldName: subFieldName,
IndexName: "array_of_vector_index", IndexName: "array_of_vector_index",
ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType), ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType),
}) })
s.Require().NoError(err) s.Require().NoError(err)
s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success) s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success)
s.WaitForIndexBuiltWithDB(ctx, s.dbName, collection, structSubVecFieldName) s.WaitForIndexBuiltWithDB(ctx, s.dbName, collection, subFieldName)
// load // load
_, err = s.Cluster.MilvusClient.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ _, err = s.Cluster.MilvusClient.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
@ -224,10 +226,10 @@ func (s *TestArrayStructSuite) run() {
nq := s.nq nq := s.nq
topk := s.topK topk := s.topK
outputFields := []string{structSubVecFieldName} outputFields := []string{subFieldName}
params := integration.GetSearchParams(s.indexType, s.metricType) params := integration.GetSearchParams(s.indexType, s.metricType)
searchReq := integration.ConstructEmbeddingListSearchRequest(s.dbName, collection, "", searchReq := integration.ConstructEmbeddingListSearchRequest(s.dbName, collection, "",
structSubVecFieldName, s.vecType, outputFields, s.metricType, params, nq, dim, topk, -1) subFieldName, s.vecType, outputFields, s.metricType, params, nq, dim, topk, -1)
searchResp, err := s.Cluster.MilvusClient.Search(ctx, searchReq) searchResp, err := s.Cluster.MilvusClient.Search(ctx, searchReq)
s.Require().NoError(err) s.Require().NoError(err)
@ -251,6 +253,6 @@ func (s *TestArrayStructSuite) TestGetVector_ArrayStruct_FloatVector() {
} }
func TestGetVectorArrayStruct(t *testing.T) { func TestGetVectorArrayStruct(t *testing.T) {
t.Skip("Skip integration test, need to refactor integration test framework.") // t.Skip("Skip integration test, need to refactor integration test framework.")
suite.Run(t, new(TestArrayStructSuite)) suite.Run(t, new(TestArrayStructSuite))
} }

View File

@ -30,6 +30,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proxy"
"github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/internal/util/importutilv2"
"github.com/milvus-io/milvus/pkg/v2/common" "github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/log" "github.com/milvus-io/milvus/pkg/v2/log"
@ -96,15 +97,16 @@ func (s *BulkInsertSuite) PrepareSourceCollection(dim int, dmlGroup *DMLGroup) *
s.NoError(merr.CheckRPCCall(createIndexStatus, err)) s.NoError(merr.CheckRPCCall(createIndexStatus, err))
s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField)
name := proxy.ConcatStructFieldName(integration.StructArrayField, integration.StructSubFloatVecField)
createIndexResult, err := c.MilvusClient.CreateIndex(context.TODO(), &milvuspb.CreateIndexRequest{ createIndexResult, err := c.MilvusClient.CreateIndex(context.TODO(), &milvuspb.CreateIndexRequest{
CollectionName: collectionName, CollectionName: collectionName,
FieldName: integration.StructSubFloatVecField, FieldName: name,
IndexName: "array_of_vector_index", IndexName: "array_of_vector_index",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexEmbListHNSW, metric.MaxSim), ExtraParams: integration.ConstructIndexParam(dim, integration.IndexEmbListHNSW, metric.MaxSim),
}) })
s.NoError(err) s.NoError(err)
s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success) s.Require().Equal(createIndexResult.GetErrorCode(), commonpb.ErrorCode_Success)
s.WaitForIndexBuilt(context.TODO(), collectionName, integration.StructSubFloatVecField) s.WaitForIndexBuilt(context.TODO(), collectionName, name)
// load // load
loadStatus, err := c.MilvusClient.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ loadStatus, err := c.MilvusClient.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{

View File

@ -200,6 +200,10 @@ func (s *BulkInsertSuite) runForStructArray() {
s.NoError(err) s.NoError(err)
s.Equal(int32(0), createCollectionStatus.GetCode()) s.Equal(int32(0), createCollectionStatus.GetCode())
// adjust struct field name
schema.StructArrayFields[0].Fields[0].Name = "struct_with_vector_array[vector_array_field]"
schema.StructArrayFields[0].Fields[1].Name = "struct_with_vector_array[scalar_array_field]"
var files []*internalpb.ImportFile var files []*internalpb.ImportFile
options := []*commonpb.KeyValuePair{} options := []*commonpb.KeyValuePair{}
@ -257,7 +261,7 @@ func (s *BulkInsertSuite) runForStructArray() {
// Create index for vector array field // Create index for vector array field
createIndexStatus, err := c.MilvusClient.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ createIndexStatus, err := c.MilvusClient.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName, CollectionName: collectionName,
FieldName: "vector_array_field", FieldName: "struct_with_vector_array[vector_array_field]",
IndexName: "_default_idx", IndexName: "_default_idx",
ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType), ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType),
}) })
@ -277,10 +281,10 @@ func (s *BulkInsertSuite) runForStructArray() {
nq := 10 nq := 10
topk := 10 topk := 10
outputFields := []string{"vector_array_field"} outputFields := []string{"struct_with_vector_array[vector_array_field]"}
params := integration.GetSearchParams(s.indexType, s.metricType) params := integration.GetSearchParams(s.indexType, s.metricType)
searchReq := integration.ConstructEmbeddingListSearchRequest("", collectionName, "", searchReq := integration.ConstructEmbeddingListSearchRequest("", collectionName, "",
"vector_array_field", s.vecType, outputFields, s.metricType, params, nq, dim, topk, -1) "struct_with_vector_array[vector_array_field]", s.vecType, outputFields, s.metricType, params, nq, dim, topk, -1)
searchResp, err := s.Cluster.MilvusClient.Search(ctx, searchReq) searchResp, err := s.Cluster.MilvusClient.Search(ctx, searchReq)
s.Require().NoError(err) s.Require().NoError(err)

View File

@ -559,8 +559,9 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
collection_name = cf.gen_collection_name_by_testcase_name() collection_name = cf.gen_collection_name_by_testcase_name()
# Create schema with duplicate field names # Create schema with duplicate field names
schema = self.create_schema(client, enable_dynamic_field=False)[0] schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("int64_field", DataType.INT64, is_primary=True, auto_id=False) schema.add_field("int64_field", DataType.INT64, is_primary=True, auto_id=False, max_length=1000)
schema.add_field("int64_field", DataType.INT64) schema.add_field("float_field", DataType.FLOAT, max_length=1000)
schema.add_field("float_field", DataType.INT64, max_length=1000)
schema.add_field("vector_field", DataType.FLOAT_VECTOR, dim=default_dim) schema.add_field("vector_field", DataType.FLOAT_VECTOR, dim=default_dim)
error = {ct.err_code: 1100, ct.err_msg: "duplicated field name"} error = {ct.err_code: 1100, ct.err_msg: "duplicated field name"}