milvus/internal/storage/schema.go
sthuang d7df78a6c9
feat: Storage v2 compaction (#40667)
- Feat: Support Mix compaction. Covering tests include compatibility and
rollback ability.
  - Read v1 segments and compact with v2 format.
  - Read both v1 and v2 segments and compact with v2 format.
  - Read v2 segments and compact with v2 format.
  - Compact with duplicate primary key test.
  - Compact with bm25 segments.
  - Compact with merge sort segments.
  - Compact with no expiration segments.
  - Compact with lack binlog segments.
  - Compact with nullable field segments.
- Feat: Support Clustering compaction. Covering tests include
compatibility and rollback ability.
  - Read v1 segments and compact with v2 format.
  - Read both v1 and v2 segments and compact with v2 format.
  - Read v2 segments and compact with v2 format.
  - Compact bm25 segments with v2 format.
  - Compact with memory limit.
- Enhance: Use serdeMap serialize in BuildRecord function to support all
Milvus data types.
related: #39173

Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
2025-03-21 10:16:12 +08:00

44 lines
1.4 KiB
Go

package storage
import (
"strconv"
"github.com/apache/arrow/go/v17/arrow"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)
func ConvertToArrowSchema(fields []*schemapb.FieldSchema) (*arrow.Schema, error) {
arrowFields := make([]arrow.Field, 0, len(fields))
for _, field := range fields {
if serdeMap[field.DataType].arrowType == nil {
return nil, merr.WrapErrParameterInvalidMsg("unknown field data type [%s] for field [%s]", field.DataType, field.GetName())
}
var dim int
switch field.DataType {
case schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector,
schemapb.DataType_Int8Vector, schemapb.DataType_FloatVector:
var err error
dim, err = GetDimFromParams(field.TypeParams)
if err != nil {
return nil, merr.WrapErrParameterInvalidMsg("dim not found in field [%s] params", field.GetName())
}
default:
dim = 0
}
arrowFields = append(arrowFields, ConvertToArrowField(field, serdeMap[field.DataType].arrowType(dim)))
}
return arrow.NewSchema(arrowFields, nil), nil
}
func ConvertToArrowField(field *schemapb.FieldSchema, dataType arrow.DataType) arrow.Field {
return arrow.Field{
Name: field.GetName(),
Type: dataType,
Metadata: arrow.NewMetadata([]string{"FieldID"}, []string{strconv.Itoa(int(field.GetFieldID()))}),
Nullable: field.GetNullable(),
}
}