fix: Fix a bug that bulkimport cannot handle empty struct list (#45693)

issue: https://github.com/milvus-io/milvus/issues/42148

Signed-off-by: yhmo <yihua.mo@zilliz.com>
This commit is contained in:
groot 2025-11-25 17:21:06 +08:00 committed by GitHub
parent 7078f403f1
commit a545ebc702
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 31 additions and 11 deletions

View File

@ -189,6 +189,15 @@ func (r *rowParser) reconstructArrayForStructArray(structName string, subFieldsM
flatStructs[fieldName] = append(flatStructs[fieldName], value)
}
}
// struct list can be empty, len(structs) can be zero
// fill flatStructs with empty list for each sub field if len(structs) is zero
if len(structs) == 0 {
for subFieldName := range subFieldsMap {
flatStructs[subFieldName] = make([]any, 0)
}
}
return flatStructs, nil
}

View File

@ -45,7 +45,7 @@ type rowParser struct {
dynamicField *schemapb.FieldSchema
functionOutputFields map[string]int64
structArrays map[string]interface{}
structArrays map[string][]string
allowInsertAutoID bool
timezone string
@ -89,10 +89,14 @@ func NewRowParser(schema *schemapb.CollectionSchema) (RowParser, error) {
)
allowInsertAutoID, _ := pkgcommon.IsAllowInsertAutoID(schema.GetProperties()...)
sturctArrays := lo.SliceToMap(
structArrays := lo.SliceToMap(
schema.GetStructArrayFields(),
func(sa *schemapb.StructArrayFieldSchema) (string, interface{}) {
return sa.GetName(), nil
func(sa *schemapb.StructArrayFieldSchema) (string, []string) {
subFieldNames := lo.Map(sa.Fields, func(field *schemapb.FieldSchema, _ int) string {
fieldName, _ := typeutil.ExtractStructFieldName(field.GetName())
return fieldName
})
return sa.GetName(), subFieldNames
},
)
@ -103,7 +107,7 @@ func NewRowParser(schema *schemapb.CollectionSchema) (RowParser, error) {
pkField: pkField,
dynamicField: dynamicField,
functionOutputFields: functionOutputFields,
structArrays: sturctArrays,
structArrays: structArrays,
allowInsertAutoID: allowInsertAutoID,
timezone: common.GetSchemaTimezone(schema),
}, nil
@ -221,12 +225,20 @@ func (r *rowParser) Parse(raw any) (Row, error) {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the field '%s' is output by function, no need to provide", key))
}
if _, ok := r.structArrays[key]; ok {
if subFieldNames, ok := r.structArrays[key]; ok {
values, err := reconstructArrayForStructArray(value)
if err != nil {
return nil, err
}
// a struct list can be empty, the values could be an empty map
// make an empty list for each sub field
if len(values) == 0 {
for i := 0; i < len(subFieldNames); i++ {
values[subFieldNames[i]] = make([]any, 0)
}
}
for subKey, subValue := range values {
// Pass struct name for sub-fields
if err := handleField(key, subKey, subValue); err != nil {

View File

@ -103,10 +103,7 @@ func (r *StructFieldReader) Next(count int64) (any, any, error) {
}
func (r *StructFieldReader) toScalarField(data []interface{}) (*schemapb.ScalarField, error) {
if len(data) == 0 {
return nil, nil
}
// struct list can be empty, len(data) can be zero, build an empty ScalarField if len(data) is zero
switch r.field.GetElementType() {
case schemapb.DataType_Bool:
boolData := make([]bool, len(data))
@ -320,7 +317,9 @@ func (r *StructFieldReader) readArrayOfVectorField(chunked *arrow.Chunked) (any,
}
}
}
if len(allVectors) > 0 {
// struct list could be empty, len(allVectors) can be zero
// build an empty VectorField if len(allVectors) is zero
if len(allVectors) >= 0 {
vectorField := &schemapb.VectorField{
Dim: int64(r.dim),
Data: &schemapb.VectorField_FloatVector{