milvus/internal/storage/schema.go
congqixia ae256c52ae
enhance: Resolve issues integrating loon FFI (#45918)
Related to #44956

- Update milvus-storage version to ba7df7b for chunk reader fix
- Pass manifest path to index build request in DataCoord/DataNode
- Add null chunk assertion with detailed debug info in
ManifestGroupTranslator
- Fix memory corruption by removing premature transaction handle
destruction
- Clean up log message in ChunkedSegmentSealedImpl

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2025-11-28 18:41:08 +08:00

87 lines
2.8 KiB
Go

package storage
import (
"fmt"
"strconv"
"github.com/apache/arrow/go/v17/arrow"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/storagev2/packed"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
func ConvertToArrowSchema(schema *schemapb.CollectionSchema, useFieldID bool) (*arrow.Schema, error) {
fieldCount := typeutil.GetTotalFieldsNum(schema)
arrowFields := make([]arrow.Field, 0, fieldCount)
appendArrowField := func(field *schemapb.FieldSchema) error {
if serdeMap[field.DataType].arrowType == nil {
return merr.WrapErrParameterInvalidMsg("unknown field data type [%s] for field [%s]", field.DataType, field.GetName())
}
var dim int
switch field.DataType {
case schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector,
schemapb.DataType_Int8Vector, schemapb.DataType_FloatVector, schemapb.DataType_ArrayOfVector:
var err error
dim, err = GetDimFromParams(field.TypeParams)
if err != nil {
return merr.WrapErrParameterInvalidMsg("dim not found in field [%s] params", field.GetName())
}
default:
dim = 0
}
elementType := schemapb.DataType_None
if field.DataType == schemapb.DataType_ArrayOfVector {
elementType = field.GetElementType()
}
arrowType := serdeMap[field.DataType].arrowType(dim, elementType)
arrowField := ConvertToArrowField(field, arrowType, useFieldID)
// Add extra metadata for ArrayOfVector
if field.DataType == schemapb.DataType_ArrayOfVector {
arrowField.Metadata = arrow.NewMetadata(
[]string{packed.ArrowFieldIdMetadataKey, "elementType", "dim"},
[]string{strconv.Itoa(int(field.GetFieldID())), strconv.Itoa(int(elementType)), strconv.Itoa(dim)},
)
}
arrowFields = append(arrowFields, arrowField)
return nil
}
for _, field := range schema.GetFields() {
if err := appendArrowField(field); err != nil {
return nil, err
}
}
for _, structField := range schema.GetStructArrayFields() {
for _, field := range structField.GetFields() {
if err := appendArrowField(field); err != nil {
return nil, err
}
}
}
return arrow.NewSchema(arrowFields, nil), nil
}
func ConvertToArrowField(field *schemapb.FieldSchema, dataType arrow.DataType, useFieldID bool) arrow.Field {
f := arrow.Field{
Type: dataType,
Metadata: arrow.NewMetadata([]string{packed.ArrowFieldIdMetadataKey}, []string{strconv.Itoa(int(field.GetFieldID()))}),
Nullable: field.GetNullable(),
}
// external field name has higher priority
if field.GetExternalField() != "" {
f.Name = field.GetExternalField()
} else if useFieldID { // use fieldID as name when specified
f.Name = fmt.Sprintf("%d", field.GetFieldID())
} else {
f.Name = field.GetName()
}
return f
}