From a545ebc70241699c98543bb71b0a8831a25bbed7 Mon Sep 17 00:00:00 2001 From: groot Date: Tue, 25 Nov 2025 17:21:06 +0800 Subject: [PATCH] fix: Fix a bug that bulkimport cannot handle empty struct list (#45693) issue: https://github.com/milvus-io/milvus/issues/42148 Signed-off-by: yhmo --- internal/util/importutilv2/csv/row_parser.go | 9 +++++++ internal/util/importutilv2/json/row_parser.go | 24 ++++++++++++++----- .../parquet/struct_field_reader.go | 9 ++++--- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/internal/util/importutilv2/csv/row_parser.go b/internal/util/importutilv2/csv/row_parser.go index b5f81e4454..69f6a2b2c2 100644 --- a/internal/util/importutilv2/csv/row_parser.go +++ b/internal/util/importutilv2/csv/row_parser.go @@ -189,6 +189,15 @@ func (r *rowParser) reconstructArrayForStructArray(structName string, subFieldsM flatStructs[fieldName] = append(flatStructs[fieldName], value) } } + + // struct list can be empty, len(structs) can be zero + // fill flatStructs with empty list for each sub field if len(structs) is zero + if len(structs) == 0 { + for subFieldName := range subFieldsMap { + flatStructs[subFieldName] = make([]any, 0) + } + } + return flatStructs, nil } diff --git a/internal/util/importutilv2/json/row_parser.go b/internal/util/importutilv2/json/row_parser.go index 05f172cdb4..201291db5e 100644 --- a/internal/util/importutilv2/json/row_parser.go +++ b/internal/util/importutilv2/json/row_parser.go @@ -45,7 +45,7 @@ type rowParser struct { dynamicField *schemapb.FieldSchema functionOutputFields map[string]int64 - structArrays map[string]interface{} + structArrays map[string][]string allowInsertAutoID bool timezone string @@ -89,10 +89,14 @@ func NewRowParser(schema *schemapb.CollectionSchema) (RowParser, error) { ) allowInsertAutoID, _ := pkgcommon.IsAllowInsertAutoID(schema.GetProperties()...) - sturctArrays := lo.SliceToMap( + structArrays := lo.SliceToMap( schema.GetStructArrayFields(), - func(sa *schemapb.StructArrayFieldSchema) (string, interface{}) { - return sa.GetName(), nil + func(sa *schemapb.StructArrayFieldSchema) (string, []string) { + subFieldNames := lo.Map(sa.Fields, func(field *schemapb.FieldSchema, _ int) string { + fieldName, _ := typeutil.ExtractStructFieldName(field.GetName()) + return fieldName + }) + return sa.GetName(), subFieldNames }, ) @@ -103,7 +107,7 @@ func NewRowParser(schema *schemapb.CollectionSchema) (RowParser, error) { pkField: pkField, dynamicField: dynamicField, functionOutputFields: functionOutputFields, - structArrays: sturctArrays, + structArrays: structArrays, allowInsertAutoID: allowInsertAutoID, timezone: common.GetSchemaTimezone(schema), }, nil @@ -221,12 +225,20 @@ func (r *rowParser) Parse(raw any) (Row, error) { return nil, merr.WrapErrImportFailed(fmt.Sprintf("the field '%s' is output by function, no need to provide", key)) } - if _, ok := r.structArrays[key]; ok { + if subFieldNames, ok := r.structArrays[key]; ok { values, err := reconstructArrayForStructArray(value) if err != nil { return nil, err } + // a struct list can be empty, the values could be an empty map + // make an empty list for each sub field + if len(values) == 0 { + for i := 0; i < len(subFieldNames); i++ { + values[subFieldNames[i]] = make([]any, 0) + } + } + for subKey, subValue := range values { // Pass struct name for sub-fields if err := handleField(key, subKey, subValue); err != nil { diff --git a/internal/util/importutilv2/parquet/struct_field_reader.go b/internal/util/importutilv2/parquet/struct_field_reader.go index 90fe12c179..ca607bebf9 100644 --- a/internal/util/importutilv2/parquet/struct_field_reader.go +++ b/internal/util/importutilv2/parquet/struct_field_reader.go @@ -103,10 +103,7 @@ func (r *StructFieldReader) Next(count int64) (any, any, error) { } func (r *StructFieldReader) toScalarField(data []interface{}) (*schemapb.ScalarField, error) { - if len(data) == 0 { - return nil, nil - } - + // struct list can be empty, len(data) can be zero, build an empty ScalarField if len(data) is zero switch r.field.GetElementType() { case schemapb.DataType_Bool: boolData := make([]bool, len(data)) @@ -320,7 +317,9 @@ func (r *StructFieldReader) readArrayOfVectorField(chunked *arrow.Chunked) (any, } } } - if len(allVectors) > 0 { + // struct list could be empty, len(allVectors) can be zero + // build an empty VectorField if len(allVectors) is zero + if len(allVectors) >= 0 { vectorField := &schemapb.VectorField{ Dim: int64(r.dim), Data: &schemapb.VectorField_FloatVector{