mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: support default value in import (#36700)
https://github.com/milvus-io/milvus/issues/31728 Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>
This commit is contained in:
parent
1375d690dd
commit
463c47ced1
@ -9,6 +9,7 @@ import (
|
|||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/nullutil"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||||
@ -296,7 +297,7 @@ func (v *validateUtil) fillWithValue(data []*schemapb.FieldData, schema *typeuti
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (v *validateUtil) fillWithNullValue(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema, numRows int) error {
|
func (v *validateUtil) fillWithNullValue(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema, numRows int) error {
|
||||||
err := checkValidData(field, fieldSchema, numRows)
|
err := nullutil.CheckValidData(field.GetValidData(), fieldSchema, numRows)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -491,7 +492,7 @@ func (v *validateUtil) fillWithDefaultValue(field *schemapb.FieldData, fieldSche
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = checkValidData(field, fieldSchema, numRows)
|
err = nullutil.CheckValidData(field.GetValidData(), fieldSchema, numRows)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -499,19 +500,6 @@ func (v *validateUtil) fillWithDefaultValue(field *schemapb.FieldData, fieldSche
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkValidData(data *schemapb.FieldData, schema *schemapb.FieldSchema, numRows int) error {
|
|
||||||
expectedNum := 0
|
|
||||||
// if nullable, the length of ValidData is numRows
|
|
||||||
if schema.GetNullable() {
|
|
||||||
expectedNum = numRows
|
|
||||||
}
|
|
||||||
if len(data.GetValidData()) != expectedNum {
|
|
||||||
msg := fmt.Sprintf("the length of valid_data of field(%s) is wrong", data.GetFieldName())
|
|
||||||
return merr.WrapErrParameterInvalid(expectedNum, len(data.GetValidData()), msg)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fillWithNullValueImpl[T any](array []T, validData []bool) ([]T, error) {
|
func fillWithNullValueImpl[T any](array []T, validData []bool) ([]T, error) {
|
||||||
n := getValidNumber(validData)
|
n := getValidNumber(validData)
|
||||||
if len(array) != n {
|
if len(array) != n {
|
||||||
|
|||||||
@ -31,6 +31,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/mocks"
|
"github.com/milvus-io/milvus/internal/mocks"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/nullutil"
|
||||||
"github.com/milvus-io/milvus/internal/util/testutil"
|
"github.com/milvus-io/milvus/internal/util/testutil"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||||
@ -160,6 +161,84 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data
|
|||||||
checkFn(res, 0, suite.numRows)
|
checkFn(res, 0, suite.numRows)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *ReaderSuite) runWithDefaultValue(dataType schemapb.DataType, elemType schemapb.DataType) {
|
||||||
|
schema := &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "pk",
|
||||||
|
IsPrimaryKey: true,
|
||||||
|
DataType: suite.pkDataType,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.MaxLengthKey,
|
||||||
|
Value: "128",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "vec",
|
||||||
|
DataType: suite.vecDataType,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.DimKey,
|
||||||
|
Value: "8",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// here always set nullable==true just for test, insertData will store validData only nullable==true
|
||||||
|
// jsonBytes Marshal from rows, if expectInsertData is nulls and set default value
|
||||||
|
// actualData will be default_value
|
||||||
|
fieldSchema, err := testutil.CreateFieldWithDefaultValue(dataType, 102, true)
|
||||||
|
suite.NoError(err)
|
||||||
|
schema.Fields = append(schema.Fields, fieldSchema)
|
||||||
|
|
||||||
|
insertData, err := testutil.CreateInsertData(schema, suite.numRows)
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData)
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
jsonBytes, err := json.Marshal(rows)
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
type mockReader struct {
|
||||||
|
io.Reader
|
||||||
|
io.Closer
|
||||||
|
io.ReaderAt
|
||||||
|
io.Seeker
|
||||||
|
}
|
||||||
|
cm := mocks.NewChunkManager(suite.T())
|
||||||
|
cm.EXPECT().Reader(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string) (storage.FileReader, error) {
|
||||||
|
r := &mockReader{Reader: strings.NewReader(string(jsonBytes))}
|
||||||
|
return r, nil
|
||||||
|
})
|
||||||
|
reader, err := NewReader(context.Background(), cm, schema, "mockPath", math.MaxInt)
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
checkFn := func(actualInsertData *storage.InsertData, offsetBegin, expectRows int) {
|
||||||
|
expectInsertData := insertData
|
||||||
|
for fieldID, data := range actualInsertData.Data {
|
||||||
|
suite.Equal(expectRows, data.RowNum())
|
||||||
|
for i := 0; i < expectRows; i++ {
|
||||||
|
expect := expectInsertData.Data[fieldID].GetRow(i + offsetBegin)
|
||||||
|
actual := data.GetRow(i)
|
||||||
|
if expect == nil {
|
||||||
|
expect, err = nullutil.GetDefaultValue(fieldSchema)
|
||||||
|
suite.NoError(err)
|
||||||
|
}
|
||||||
|
suite.Equal(expect, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res, err := reader.Read()
|
||||||
|
suite.NoError(err)
|
||||||
|
checkFn(res, 0, suite.numRows)
|
||||||
|
}
|
||||||
|
|
||||||
func (suite *ReaderSuite) TestReadScalarFields() {
|
func (suite *ReaderSuite) TestReadScalarFields() {
|
||||||
suite.run(schemapb.DataType_Bool, schemapb.DataType_None, false)
|
suite.run(schemapb.DataType_Bool, schemapb.DataType_None, false)
|
||||||
suite.run(schemapb.DataType_Int8, schemapb.DataType_None, false)
|
suite.run(schemapb.DataType_Int8, schemapb.DataType_None, false)
|
||||||
@ -202,6 +281,18 @@ func (suite *ReaderSuite) TestReadScalarFields() {
|
|||||||
suite.run(schemapb.DataType_Array, schemapb.DataType_String, true)
|
suite.run(schemapb.DataType_Array, schemapb.DataType_String, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *ReaderSuite) TestReadScalarFieldsWithDefaultValue() {
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_Double, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None)
|
||||||
|
suite.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None)
|
||||||
|
}
|
||||||
|
|
||||||
func (suite *ReaderSuite) TestStringPK() {
|
func (suite *ReaderSuite) TestStringPK() {
|
||||||
suite.pkDataType = schemapb.DataType_VarChar
|
suite.pkDataType = schemapb.DataType_VarChar
|
||||||
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
|
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
|
||||||
|
|||||||
@ -25,6 +25,7 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/nullutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/parameterutil"
|
"github.com/milvus-io/milvus/pkg/util/parameterutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
@ -195,6 +196,9 @@ func (r *rowParser) combineDynamicRow(dynamicValues map[string]any, row Row) err
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||||
|
if r.id2Field[fieldID].GetDefaultValue() != nil && obj == nil {
|
||||||
|
return nullutil.GetDefaultValue(r.id2Field[fieldID])
|
||||||
|
}
|
||||||
if r.id2Field[fieldID].GetNullable() {
|
if r.id2Field[fieldID].GetNullable() {
|
||||||
return r.parseNullableEntity(fieldID, obj)
|
return r.parseNullableEntity(fieldID, obj)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,6 +30,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/nullutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/parameterutil"
|
"github.com/milvus-io/milvus/pkg/util/parameterutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
@ -69,37 +70,37 @@ func NewFieldReader(ctx context.Context, reader *pqarrow.FileReader, columnIndex
|
|||||||
func (c *FieldReader) Next(count int64) (any, any, error) {
|
func (c *FieldReader) Next(count int64) (any, any, error) {
|
||||||
switch c.field.GetDataType() {
|
switch c.field.GetDataType() {
|
||||||
case schemapb.DataType_Bool:
|
case schemapb.DataType_Bool:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableBoolData(c, count)
|
return ReadNullableBoolData(c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadBoolData(c, count)
|
data, err := ReadBoolData(c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Int8:
|
case schemapb.DataType_Int8:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableIntegerOrFloatData[int8](c, count)
|
return ReadNullableIntegerOrFloatData[int8](c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadIntegerOrFloatData[int8](c, count)
|
data, err := ReadIntegerOrFloatData[int8](c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Int16:
|
case schemapb.DataType_Int16:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableIntegerOrFloatData[int16](c, count)
|
return ReadNullableIntegerOrFloatData[int16](c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadIntegerOrFloatData[int16](c, count)
|
data, err := ReadIntegerOrFloatData[int16](c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Int32:
|
case schemapb.DataType_Int32:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableIntegerOrFloatData[int32](c, count)
|
return ReadNullableIntegerOrFloatData[int32](c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadIntegerOrFloatData[int32](c, count)
|
data, err := ReadIntegerOrFloatData[int32](c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Int64:
|
case schemapb.DataType_Int64:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableIntegerOrFloatData[int64](c, count)
|
return ReadNullableIntegerOrFloatData[int64](c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadIntegerOrFloatData[int64](c, count)
|
data, err := ReadIntegerOrFloatData[int64](c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Float:
|
case schemapb.DataType_Float:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
data, validData, err := ReadNullableIntegerOrFloatData[float32](c, count)
|
data, validData, err := ReadNullableIntegerOrFloatData[float32](c, count)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
@ -118,7 +119,7 @@ func (c *FieldReader) Next(count int64) (any, any, error) {
|
|||||||
}
|
}
|
||||||
return data, nil, typeutil.VerifyFloats32(data.([]float32))
|
return data, nil, typeutil.VerifyFloats32(data.([]float32))
|
||||||
case schemapb.DataType_Double:
|
case schemapb.DataType_Double:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
data, validData, err := ReadNullableIntegerOrFloatData[float64](c, count)
|
data, validData, err := ReadNullableIntegerOrFloatData[float64](c, count)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
@ -137,18 +138,20 @@ func (c *FieldReader) Next(count int64) (any, any, error) {
|
|||||||
}
|
}
|
||||||
return data, nil, typeutil.VerifyFloats64(data.([]float64))
|
return data, nil, typeutil.VerifyFloats64(data.([]float64))
|
||||||
case schemapb.DataType_VarChar, schemapb.DataType_String:
|
case schemapb.DataType_VarChar, schemapb.DataType_String:
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() || c.field.GetDefaultValue() != nil {
|
||||||
return ReadNullableVarcharData(c, count)
|
return ReadNullableVarcharData(c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadVarcharData(c, count)
|
data, err := ReadVarcharData(c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_JSON:
|
case schemapb.DataType_JSON:
|
||||||
|
// json has not support default_value
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() {
|
||||||
return ReadNullableJSONData(c, count)
|
return ReadNullableJSONData(c, count)
|
||||||
}
|
}
|
||||||
data, err := ReadJSONData(c, count)
|
data, err := ReadJSONData(c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
|
case schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
|
||||||
|
// vector not support default_value
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() {
|
||||||
return nil, nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
|
return nil, nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
|
||||||
}
|
}
|
||||||
@ -174,6 +177,7 @@ func (c *FieldReader) Next(count int64) (any, any, error) {
|
|||||||
data, err := ReadSparseFloatVectorData(c, count)
|
data, err := ReadSparseFloatVectorData(c, count)
|
||||||
return data, nil, err
|
return data, nil, err
|
||||||
case schemapb.DataType_Array:
|
case schemapb.DataType_Array:
|
||||||
|
// array has not support default_value
|
||||||
if c.field.GetNullable() {
|
if c.field.GetNullable() {
|
||||||
return ReadNullableArrayData(c, count)
|
return ReadNullableArrayData(c, count)
|
||||||
}
|
}
|
||||||
@ -212,6 +216,30 @@ func ReadBoolData(pcr *FieldReader, count int64) (any, error) {
|
|||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fillWithDefaultValueImpl[T any](array []T, value T, validData []bool, field *schemapb.FieldSchema) (any, []bool, error) {
|
||||||
|
rowNum := len(validData)
|
||||||
|
for i, v := range validData {
|
||||||
|
if !v {
|
||||||
|
array[i] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !typeutil.IsVectorType(field.GetDataType()) {
|
||||||
|
if field.GetNullable() {
|
||||||
|
for i := range validData {
|
||||||
|
validData[i] = true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
validData = []bool{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := nullutil.CheckValidData(validData, field, rowNum)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return array, validData, nil
|
||||||
|
}
|
||||||
|
|
||||||
func ReadNullableBoolData(pcr *FieldReader, count int64) (any, []bool, error) {
|
func ReadNullableBoolData(pcr *FieldReader, count int64) (any, []bool, error) {
|
||||||
chunked, err := pcr.columnReader.NextBatch(count)
|
chunked, err := pcr.columnReader.NextBatch(count)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -237,11 +265,15 @@ func ReadNullableBoolData(pcr *FieldReader, count int64) (any, []bool, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if len(data) != len(validData) {
|
||||||
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
|
}
|
||||||
if len(data) == 0 {
|
if len(data) == 0 {
|
||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
if len(data) != len(validData) {
|
if pcr.field.GetDefaultValue() != nil {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
defaultValue := pcr.field.GetDefaultValue().GetBoolData()
|
||||||
|
return fillWithDefaultValueImpl(data, defaultValue, validData, pcr.field)
|
||||||
}
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
@ -367,11 +399,19 @@ func ReadNullableIntegerOrFloatData[T constraints.Integer | constraints.Float](p
|
|||||||
return nil, nil, WrapTypeErr("integer|float|null", chunk.DataType().Name(), pcr.field)
|
return nil, nil, WrapTypeErr("integer|float|null", chunk.DataType().Name(), pcr.field)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if len(data) != len(validData) {
|
||||||
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
|
}
|
||||||
if len(data) == 0 {
|
if len(data) == 0 {
|
||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
if len(data) != len(validData) {
|
if pcr.field.GetDefaultValue() != nil {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
defaultValue, err := nullutil.GetDefaultValue(pcr.field)
|
||||||
|
if err != nil {
|
||||||
|
// won't happen
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return fillWithDefaultValueImpl(data, defaultValue.(T), validData, pcr.field)
|
||||||
}
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
@ -430,12 +470,12 @@ func ReadNullableStringData(pcr *FieldReader, count int64) (any, []bool, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
if len(data) != len(validData) {
|
if len(data) != len(validData) {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
}
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -507,11 +547,15 @@ func ReadNullableVarcharData(pcr *FieldReader, count int64) (any, []bool, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if len(data) != len(validData) {
|
||||||
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
|
}
|
||||||
if len(data) == 0 {
|
if len(data) == 0 {
|
||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
if len(data) != len(validData) {
|
if pcr.field.GetDefaultValue() != nil {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
defaultValue := pcr.field.GetDefaultValue().GetStringData()
|
||||||
|
return fillWithDefaultValueImpl(data, defaultValue, validData, pcr.field)
|
||||||
}
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
@ -739,12 +783,12 @@ func ReadNullableBoolArrayData(pcr *FieldReader, count int64) (any, []bool, erro
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
if len(data) != len(validData) {
|
if len(data) != len(validData) {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
}
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -898,12 +942,12 @@ func ReadNullableIntegerOrFloatArrayData[T constraints.Integer | constraints.Flo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
if len(data) != len(validData) {
|
if len(data) != len(validData) {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
}
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -977,12 +1021,12 @@ func ReadNullableStringArrayData(pcr *FieldReader, count int64) (any, []bool, er
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
if len(data) != len(validData) {
|
if len(data) != len(validData) {
|
||||||
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
|
||||||
}
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
return data, validData, nil
|
return data, validData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -34,6 +34,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/nullutil"
|
||||||
"github.com/milvus-io/milvus/internal/util/testutil"
|
"github.com/milvus-io/milvus/internal/util/testutil"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||||
@ -285,6 +286,135 @@ func (s *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
|
|||||||
s.Error(err)
|
s.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ReaderSuite) runWithDefaultValue(dataType schemapb.DataType, elemType schemapb.DataType, nullable bool, nullPercent int) {
|
||||||
|
schema := &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "pk",
|
||||||
|
IsPrimaryKey: true,
|
||||||
|
DataType: s.pkDataType,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: "max_length",
|
||||||
|
Value: "256",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "vec",
|
||||||
|
DataType: s.vecDataType,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.DimKey,
|
||||||
|
Value: "8",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// here always set nullable==true just for test, insertData will store validData only nullable==true
|
||||||
|
// if expectInsertData is nulls and set default value
|
||||||
|
// actualData will be default_value
|
||||||
|
fieldSchema, err := testutil.CreateFieldWithDefaultValue(dataType, 102, true)
|
||||||
|
s.NoError(err)
|
||||||
|
schema.Fields = append(schema.Fields, fieldSchema)
|
||||||
|
|
||||||
|
filePath := fmt.Sprintf("/tmp/test_%d_reader.parquet", rand.Int())
|
||||||
|
defer os.Remove(filePath)
|
||||||
|
wf, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, 0o666)
|
||||||
|
assert.NoError(s.T(), err)
|
||||||
|
insertData, err := writeParquet(wf, schema, s.numRows, nullPercent)
|
||||||
|
assert.NoError(s.T(), err)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
f := storage.NewChunkManagerFactory("local", storage.RootPath("/tmp/milvus_test/test_parquet_reader/"))
|
||||||
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
||||||
|
assert.NoError(s.T(), err)
|
||||||
|
schema.Fields[2].Nullable = nullable
|
||||||
|
reader, err := NewReader(ctx, cm, schema, filePath, 64*1024*1024)
|
||||||
|
s.NoError(err)
|
||||||
|
|
||||||
|
checkFn := func(actualInsertData *storage.InsertData, offsetBegin, expectRows int) {
|
||||||
|
expectInsertData := insertData
|
||||||
|
for fieldID, data := range actualInsertData.Data {
|
||||||
|
s.Equal(expectRows, data.RowNum())
|
||||||
|
for i := 0; i < expectRows; i++ {
|
||||||
|
expect := expectInsertData.Data[fieldID].GetRow(i + offsetBegin)
|
||||||
|
actual := data.GetRow(i)
|
||||||
|
if expect == nil {
|
||||||
|
expect, err = nullutil.GetDefaultValue(fieldSchema)
|
||||||
|
s.NoError(err)
|
||||||
|
}
|
||||||
|
s.Equal(expect, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := reader.Read()
|
||||||
|
s.NoError(err)
|
||||||
|
checkFn(res, 0, s.numRows)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ReaderSuite) TestReadScalarFieldsWithDefaultValue() {
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Double, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, true, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, true, 0)
|
||||||
|
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, true, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, true, 50)
|
||||||
|
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, true, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, true, 100)
|
||||||
|
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Double, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, false, 0)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, false, 0)
|
||||||
|
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, false, 50)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, false, 50)
|
||||||
|
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Bool, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int8, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int16, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int32, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Int64, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_Float, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_String, schemapb.DataType_None, false, 100)
|
||||||
|
s.runWithDefaultValue(schemapb.DataType_VarChar, schemapb.DataType_None, false, 100)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ReaderSuite) TestReadScalarFields() {
|
func (s *ReaderSuite) TestReadScalarFields() {
|
||||||
s.run(schemapb.DataType_Bool, schemapb.DataType_None, false, 0)
|
s.run(schemapb.DataType_Bool, schemapb.DataType_None, false, 0)
|
||||||
s.run(schemapb.DataType_Int8, schemapb.DataType_None, false, 0)
|
s.run(schemapb.DataType_Int8, schemapb.DataType_None, false, 0)
|
||||||
|
|||||||
@ -115,7 +115,7 @@ func isArrowArithmeticType(dataType arrow.Type) bool {
|
|||||||
return isArrowIntegerType(dataType) || isArrowFloatingType(dataType)
|
return isArrowIntegerType(dataType) || isArrowFloatingType(dataType)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isArrowDataTypeConvertible(src arrow.DataType, dst arrow.DataType, nullable bool) bool {
|
func isArrowDataTypeConvertible(src arrow.DataType, dst arrow.DataType, field *schemapb.FieldSchema) bool {
|
||||||
srcType := src.ID()
|
srcType := src.ID()
|
||||||
dstType := dst.ID()
|
dstType := dst.ID()
|
||||||
switch srcType {
|
switch srcType {
|
||||||
@ -142,9 +142,10 @@ func isArrowDataTypeConvertible(src arrow.DataType, dst arrow.DataType, nullable
|
|||||||
case arrow.BINARY:
|
case arrow.BINARY:
|
||||||
return dstType == arrow.LIST && dst.(*arrow.ListType).Elem().ID() == arrow.UINT8
|
return dstType == arrow.LIST && dst.(*arrow.ListType).Elem().ID() == arrow.UINT8
|
||||||
case arrow.LIST:
|
case arrow.LIST:
|
||||||
return dstType == arrow.LIST && isArrowDataTypeConvertible(src.(*arrow.ListType).Elem(), dst.(*arrow.ListType).Elem(), nullable)
|
return dstType == arrow.LIST && isArrowDataTypeConvertible(src.(*arrow.ListType).Elem(), dst.(*arrow.ListType).Elem(), field)
|
||||||
case arrow.NULL:
|
case arrow.NULL:
|
||||||
return nullable
|
// if nullable==true or has set default_value, can use null type
|
||||||
|
return field.GetNullable() || field.GetDefaultValue() != nil
|
||||||
default:
|
default:
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -218,13 +219,18 @@ func ConvertToArrowSchema(schema *schemapb.CollectionSchema, useNullType bool) (
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
nullable := field.GetNullable()
|
||||||
if field.GetNullable() && useNullType {
|
if field.GetNullable() && useNullType {
|
||||||
arrDataType = arrow.Null
|
arrDataType = arrow.Null
|
||||||
}
|
}
|
||||||
|
if field.GetDefaultValue() != nil && useNullType {
|
||||||
|
arrDataType = arrow.Null
|
||||||
|
nullable = true
|
||||||
|
}
|
||||||
arrFields = append(arrFields, arrow.Field{
|
arrFields = append(arrFields, arrow.Field{
|
||||||
Name: field.GetName(),
|
Name: field.GetName(),
|
||||||
Type: arrDataType,
|
Type: arrDataType,
|
||||||
Nullable: field.GetNullable(),
|
Nullable: nullable,
|
||||||
Metadata: arrow.Metadata{},
|
Metadata: arrow.Metadata{},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -250,7 +256,7 @@ func isSchemaEqual(schema *schemapb.CollectionSchema, arrSchema *arrow.Schema) e
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !isArrowDataTypeConvertible(arrField.Type, toArrDataType, field.GetNullable()) {
|
if !isArrowDataTypeConvertible(arrField.Type, toArrDataType, field) {
|
||||||
return merr.WrapErrImportFailed(fmt.Sprintf("field '%s' type mis-match, milvus data type '%s', arrow data type get '%s'",
|
return merr.WrapErrImportFailed(fmt.Sprintf("field '%s' type mis-match, milvus data type '%s', arrow data type get '%s'",
|
||||||
field.Name, field.DataType.String(), arrField.Type.String()))
|
field.Name, field.DataType.String(), arrField.Type.String()))
|
||||||
}
|
}
|
||||||
|
|||||||
64
internal/util/nullutil/nullutil.go
Normal file
64
internal/util/nullutil/nullutil.go
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package nullutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CheckValidData(validData []bool, schema *schemapb.FieldSchema, numRows int) error {
|
||||||
|
expectedNum := 0
|
||||||
|
// if nullable, the length of ValidData is numRows
|
||||||
|
if schema.GetNullable() {
|
||||||
|
expectedNum = numRows
|
||||||
|
}
|
||||||
|
if len(validData) != expectedNum {
|
||||||
|
msg := fmt.Sprintf("the length of valid_data of field(%s) is wrong", schema.GetName())
|
||||||
|
return merr.WrapErrParameterInvalid(expectedNum, len(validData), msg)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetDefaultValue(field *schemapb.FieldSchema) (any, error) {
|
||||||
|
if field.GetDefaultValue() != nil {
|
||||||
|
switch field.GetDataType() {
|
||||||
|
case schemapb.DataType_Bool:
|
||||||
|
return field.GetDefaultValue().GetBoolData(), nil
|
||||||
|
case schemapb.DataType_Int8:
|
||||||
|
return int8(field.GetDefaultValue().GetIntData()), nil
|
||||||
|
case schemapb.DataType_Int16:
|
||||||
|
return int16(field.GetDefaultValue().GetIntData()), nil
|
||||||
|
case schemapb.DataType_Int32:
|
||||||
|
return field.GetDefaultValue().GetIntData(), nil
|
||||||
|
case schemapb.DataType_Int64:
|
||||||
|
return field.GetDefaultValue().GetLongData(), nil
|
||||||
|
case schemapb.DataType_Float:
|
||||||
|
return field.GetDefaultValue().GetFloatData(), nil
|
||||||
|
case schemapb.DataType_Double:
|
||||||
|
return field.GetDefaultValue().GetDoubleData(), nil
|
||||||
|
case schemapb.DataType_String, schemapb.DataType_VarChar:
|
||||||
|
return field.GetDefaultValue().GetStringData(), nil
|
||||||
|
default:
|
||||||
|
msg := fmt.Sprintf("type (%s) not support default_value", field.GetDataType().String())
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
@ -201,14 +201,82 @@ func CreateInsertData(schema *schemapb.CollectionSchema, rows int, nullPercent .
|
|||||||
insertData.Data[f.FieldID].AppendValidDataRows(testutils.GenerateBoolArray(rows))
|
insertData.Data[f.FieldID].AppendValidDataRows(testutils.GenerateBoolArray(rows))
|
||||||
} else if len(nullPercent) == 1 && nullPercent[0] == 100 {
|
} else if len(nullPercent) == 1 && nullPercent[0] == 100 {
|
||||||
insertData.Data[f.FieldID].AppendValidDataRows(make([]bool, rows))
|
insertData.Data[f.FieldID].AppendValidDataRows(make([]bool, rows))
|
||||||
|
} else if len(nullPercent) == 1 && nullPercent[0] == 0 {
|
||||||
|
validData := make([]bool, rows)
|
||||||
|
for i := range validData {
|
||||||
|
validData[i] = true
|
||||||
|
}
|
||||||
|
insertData.Data[f.FieldID].AppendValidDataRows(validData)
|
||||||
} else {
|
} else {
|
||||||
return nil, merr.WrapErrParameterInvalidMsg("not support the number of nullPercent")
|
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("not support the number of nullPercent(%d)", nullPercent))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return insertData, nil
|
return insertData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CreateFieldWithDefaultValue(dataType schemapb.DataType, id int64, nullable bool) (*schemapb.FieldSchema, error) {
|
||||||
|
field := &schemapb.FieldSchema{
|
||||||
|
FieldID: 102,
|
||||||
|
Name: dataType.String(),
|
||||||
|
DataType: dataType,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.MaxLengthKey,
|
||||||
|
Value: "128",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: common.MaxCapacityKey,
|
||||||
|
Value: "128",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Nullable: nullable,
|
||||||
|
}
|
||||||
|
|
||||||
|
switch field.GetDataType() {
|
||||||
|
case schemapb.DataType_Bool:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_BoolData{
|
||||||
|
BoolData: ([]bool{true, false})[rand.Intn(2)],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_IntData{
|
||||||
|
IntData: ([]int32{1, 10, 100, 1000})[rand.Intn(4)],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case schemapb.DataType_Int64:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_LongData{
|
||||||
|
LongData: rand.Int63(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case schemapb.DataType_Float:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_FloatData{
|
||||||
|
FloatData: rand.Float32(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case schemapb.DataType_Double:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_DoubleData{
|
||||||
|
DoubleData: rand.Float64(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case schemapb.DataType_String, schemapb.DataType_VarChar:
|
||||||
|
field.DefaultValue = &schemapb.ValueField{
|
||||||
|
Data: &schemapb.ValueField_StringData{
|
||||||
|
StringData: randomString(10),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
msg := fmt.Sprintf("type (%s) not support default_value", field.GetDataType().String())
|
||||||
|
return nil, merr.WrapErrParameterInvalidMsg(msg)
|
||||||
|
}
|
||||||
|
return field, nil
|
||||||
|
}
|
||||||
|
|
||||||
func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.InsertData, useNullType bool) ([]arrow.Array, error) {
|
func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.InsertData, useNullType bool) ([]arrow.Array, error) {
|
||||||
mem := memory.NewGoAllocator()
|
mem := memory.NewGoAllocator()
|
||||||
columns := make([]arrow.Array, 0, len(schema.Fields))
|
columns := make([]arrow.Array, 0, len(schema.Fields))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user