enhance: support null in go payload (#32296)

#31728

---------

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
This commit is contained in:
smellthemoon 2024-06-19 17:08:00 +08:00 committed by GitHub
parent eb3197eb1e
commit 2a1356985d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 3048 additions and 1195 deletions

View File

@ -34,7 +34,7 @@ GetFixPartSize(DescriptorEventData& data) {
sizeof(data.fix_part.segment_id) + sizeof(data.fix_part.field_id) + sizeof(data.fix_part.segment_id) + sizeof(data.fix_part.field_id) +
sizeof(data.fix_part.start_timestamp) + sizeof(data.fix_part.start_timestamp) +
sizeof(data.fix_part.end_timestamp) + sizeof(data.fix_part.end_timestamp) +
sizeof(data.fix_part.data_type); sizeof(data.fix_part.data_type) + sizeof(data.fix_part.nullable);
} }
int int
GetFixPartSize(BaseEventData& data) { GetFixPartSize(BaseEventData& data) {
@ -107,6 +107,8 @@ DescriptorEventDataFixPart::DescriptorEventDataFixPart(BinlogReaderPtr reader) {
assert(ast.ok()); assert(ast.ok());
ast = reader->Read(sizeof(field_id), &field_id); ast = reader->Read(sizeof(field_id), &field_id);
assert(ast.ok()); assert(ast.ok());
ast = reader->Read(sizeof(nullable), &nullable);
assert(ast.ok());
ast = reader->Read(sizeof(start_timestamp), &start_timestamp); ast = reader->Read(sizeof(start_timestamp), &start_timestamp);
assert(ast.ok()); assert(ast.ok());
ast = reader->Read(sizeof(end_timestamp), &end_timestamp); ast = reader->Read(sizeof(end_timestamp), &end_timestamp);
@ -120,7 +122,7 @@ DescriptorEventDataFixPart::Serialize() {
auto fix_part_size = sizeof(collection_id) + sizeof(partition_id) + auto fix_part_size = sizeof(collection_id) + sizeof(partition_id) +
sizeof(segment_id) + sizeof(field_id) + sizeof(segment_id) + sizeof(field_id) +
sizeof(start_timestamp) + sizeof(end_timestamp) + sizeof(start_timestamp) + sizeof(end_timestamp) +
sizeof(data_type); sizeof(data_type) + sizeof(nullable);
std::vector<uint8_t> res(fix_part_size); std::vector<uint8_t> res(fix_part_size);
int offset = 0; int offset = 0;
memcpy(res.data() + offset, &collection_id, sizeof(collection_id)); memcpy(res.data() + offset, &collection_id, sizeof(collection_id));
@ -131,6 +133,8 @@ DescriptorEventDataFixPart::Serialize() {
offset += sizeof(segment_id); offset += sizeof(segment_id);
memcpy(res.data() + offset, &field_id, sizeof(field_id)); memcpy(res.data() + offset, &field_id, sizeof(field_id));
offset += sizeof(field_id); offset += sizeof(field_id);
memcpy(res.data() + offset, &nullable, sizeof(nullable));
offset += sizeof(nullable);
memcpy(res.data() + offset, &start_timestamp, sizeof(start_timestamp)); memcpy(res.data() + offset, &start_timestamp, sizeof(start_timestamp));
offset += sizeof(start_timestamp); offset += sizeof(start_timestamp);
memcpy(res.data() + offset, &end_timestamp, sizeof(end_timestamp)); memcpy(res.data() + offset, &end_timestamp, sizeof(end_timestamp));

View File

@ -46,6 +46,8 @@ struct DescriptorEventDataFixPart {
int64_t partition_id; int64_t partition_id;
int64_t segment_id; int64_t segment_id;
int64_t field_id; int64_t field_id;
//(todo:smellthemoon) set nullable false temporarily, will change it
bool nullable = false;
Timestamp start_timestamp; Timestamp start_timestamp;
Timestamp end_timestamp; Timestamp end_timestamp;
milvus::proto::schema::DataType data_type; milvus::proto::schema::DataType data_type;

View File

@ -61,6 +61,8 @@ InsertData::serialize_to_remote_file() {
des_fix_part.start_timestamp = time_range_.first; des_fix_part.start_timestamp = time_range_.first;
des_fix_part.end_timestamp = time_range_.second; des_fix_part.end_timestamp = time_range_.second;
des_fix_part.data_type = milvus::proto::schema::DataType(data_type); des_fix_part.data_type = milvus::proto::schema::DataType(data_type);
//(todo:smellthemoon) set nullable false temporarily, will change it
des_fix_part.nullable = false;
for (auto i = int8_t(EventType::DescriptorEvent); for (auto i = int8_t(EventType::DescriptorEvent);
i < int8_t(EventType::EventTypeEnd); i < int8_t(EventType::EventTypeEnd);
i++) { i++) {

View File

@ -77,6 +77,7 @@ PayloadReader::init(std::shared_ptr<arrow::io::BufferReader> input) {
*rb_reader) { *rb_reader) {
AssertInfo(maybe_batch.ok(), "get batch record success"); AssertInfo(maybe_batch.ok(), "get batch record success");
auto array = maybe_batch.ValueOrDie()->column(column_index); auto array = maybe_batch.ValueOrDie()->column(column_index);
// to read
field_data_->FillFieldData(array); field_data_->FillFieldData(array);
} }
AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); AssertInfo(field_data_->IsFull(), "field data hasn't been filled done");

View File

@ -321,6 +321,81 @@ func (s *SyncTaskSuiteV2) TestBuildRecord() {
s.EqualValues(2, b.NewRecord().NumRows()) s.EqualValues(2, b.NewRecord().NumRows())
} }
func (s *SyncTaskSuiteV2) TestBuildRecordNullable() {
fieldSchemas := []*schemapb.FieldSchema{
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
}
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
s.NoError(err)
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
defer b.Release()
data := &storage.InsertData{
Data: map[int64]storage.FieldData{
1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}},
2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}},
3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}},
4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}},
5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}},
6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}},
7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}},
8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
11: &storage.FloatVectorFieldData{
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
Dim: 4,
},
12: &storage.ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
},
},
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
},
},
},
ValidData: []bool{true, true},
},
13: &storage.JSONFieldData{
Data: [][]byte{
[]byte(`{"batch":2}`),
[]byte(`{"key":"world"}`),
},
ValidData: []bool{true, true},
},
14: &storage.Float16VectorFieldData{
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
},
}
err = typeutil.BuildRecord(b, data, fieldSchemas)
s.NoError(err)
s.EqualValues(2, b.NewRecord().NumRows())
}
func TestSyncTaskV2(t *testing.T) { func TestSyncTaskV2(t *testing.T) {
suite.Run(t, new(SyncTaskSuiteV2)) suite.Run(t, new(SyncTaskSuiteV2))
} }

View File

@ -632,7 +632,7 @@ func SaveDeltaLog(collectionID int64,
for i := int64(0); i < dData.RowCount; i++ { for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*storage.Int64PrimaryKey).Value int64PkValue := dData.Pks[i].(*storage.Int64PrimaryKey).Value
ts := dData.Tss[i] ts := dData.Tss[i]
eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts)) eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true)
sizeTotal += binary.Size(int64PkValue) sizeTotal += binary.Size(int64PkValue)
sizeTotal += binary.Size(ts) sizeTotal += binary.Size(ts)
} }

View File

@ -1451,7 +1451,7 @@ func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *Loca
return err return err
} }
rowIDs, err := er.GetInt64FromPayload() rowIDs, _, err := er.GetInt64FromPayload()
if err != nil { if err != nil {
return err return err
} }

View File

@ -50,7 +50,7 @@ func (reader *BinlogReader) NextEventReader() (*EventReader, error) {
reader.eventReader.Close() reader.eventReader.Close()
} }
var err error var err error
reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer) reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer, reader.descriptorEvent.Nullable)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -37,25 +37,25 @@ import (
/* #nosec G103 */ /* #nosec G103 */
func TestInsertBinlog(t *testing.T) { func TestInsertBinlog(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := w.NextInsertEventWriter() e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
e2, err := w.NextInsertEventWriter() e2, err := w.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true}) err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12}) err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e2.SetEventTimestamp(300, 400) e2.SetEventTimestamp(300, 400)
@ -123,6 +123,11 @@ func TestInsertBinlog(t *testing.T) {
assert.Equal(t, fieldID, int64(40)) assert.Equal(t, fieldID, int64(40))
pos += int(unsafe.Sizeof(fieldID)) pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000)) assert.Equal(t, startts, int64(1000))
@ -201,11 +206,12 @@ func TestInsertBinlog(t *testing.T) {
// insert e1, payload // insert e1, payload
e1Payload := buf[pos:e1NxtPos] e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload() e1a, valids, err := e1r.GetInt64FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
e1r.Close() e1r.Close()
// start of e2 // start of e2
@ -243,11 +249,12 @@ func TestInsertBinlog(t *testing.T) {
// insert e2, payload // insert e2, payload
e2Payload := buf[pos:] e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload() e2a, valids, err := e2r.GetInt64FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
assert.Nil(t, valids)
e2r.Close() e2r.Close()
assert.Equal(t, int(e2NxtPos), len(buf)) assert.Equal(t, int(e2NxtPos), len(buf))
@ -258,8 +265,9 @@ func TestInsertBinlog(t *testing.T) {
event1, err := r.NextEventReader() event1, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event1) assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload() p1, valids, err := event1.GetInt64FromPayload()
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, InsertEventType) assert.Equal(t, event1.TypeCode, InsertEventType)
ed1, ok := (event1.eventData).(*insertEventData) ed1, ok := (event1.eventData).(*insertEventData)
@ -270,9 +278,10 @@ func TestInsertBinlog(t *testing.T) {
event2, err := r.NextEventReader() event2, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event2) assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload() p2, valids, err := event2.GetInt64FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Nil(t, valids)
assert.Equal(t, event2.TypeCode, InsertEventType) assert.Equal(t, event2.TypeCode, InsertEventType)
ed2, ok := (event2.eventData).(*insertEventData) ed2, ok := (event2.eventData).(*insertEventData)
assert.True(t, ok) assert.True(t, ok)
@ -288,21 +297,21 @@ func TestDeleteBinlog(t *testing.T) {
e1, err := w.NextDeleteEventWriter() e1, err := w.NextDeleteEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
e2, err := w.NextDeleteEventWriter() e2, err := w.NextDeleteEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true}) err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12}) err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e2.SetEventTimestamp(300, 400) e2.SetEventTimestamp(300, 400)
@ -370,6 +379,11 @@ func TestDeleteBinlog(t *testing.T) {
assert.Equal(t, fieldID, int64(-1)) assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID)) pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000)) assert.Equal(t, startts, int64(1000))
@ -448,11 +462,12 @@ func TestDeleteBinlog(t *testing.T) {
// insert e1, payload // insert e1, payload
e1Payload := buf[pos:e1NxtPos] e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload() e1a, valids, err := e1r.GetInt64FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
e1r.Close() e1r.Close()
// start of e2 // start of e2
@ -490,10 +505,11 @@ func TestDeleteBinlog(t *testing.T) {
// insert e2, payload // insert e2, payload
e2Payload := buf[pos:] e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload() e2a, valids, err := e2r.GetInt64FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Nil(t, valids)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close() e2r.Close()
@ -505,7 +521,8 @@ func TestDeleteBinlog(t *testing.T) {
event1, err := r.NextEventReader() event1, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event1) assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload() p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, DeleteEventType) assert.Equal(t, event1.TypeCode, DeleteEventType)
@ -517,7 +534,8 @@ func TestDeleteBinlog(t *testing.T) {
event2, err := r.NextEventReader() event2, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event2) assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload() p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DeleteEventType) assert.Equal(t, event2.TypeCode, DeleteEventType)
@ -535,21 +553,21 @@ func TestDDLBinlog1(t *testing.T) {
e1, err := w.NextCreateCollectionEventWriter() e1, err := w.NextCreateCollectionEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
e2, err := w.NextDropCollectionEventWriter() e2, err := w.NextDropCollectionEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true}) err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12}) err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e2.SetEventTimestamp(300, 400) e2.SetEventTimestamp(300, 400)
@ -617,6 +635,11 @@ func TestDDLBinlog1(t *testing.T) {
assert.Equal(t, fieldID, int64(-1)) assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID)) pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000)) assert.Equal(t, startts, int64(1000))
@ -695,9 +718,10 @@ func TestDDLBinlog1(t *testing.T) {
// insert e1, payload // insert e1, payload
e1Payload := buf[pos:e1NxtPos] e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload() e1a, valids, err := e1r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
e1r.Close() e1r.Close()
@ -737,9 +761,10 @@ func TestDDLBinlog1(t *testing.T) {
// insert e2, payload // insert e2, payload
e2Payload := buf[pos:] e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload() e2a, valids, err := e2r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close() e2r.Close()
@ -752,7 +777,8 @@ func TestDDLBinlog1(t *testing.T) {
event1, err := r.NextEventReader() event1, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event1) assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload() p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, CreateCollectionEventType) assert.Equal(t, event1.TypeCode, CreateCollectionEventType)
@ -764,7 +790,8 @@ func TestDDLBinlog1(t *testing.T) {
event2, err := r.NextEventReader() event2, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event2) assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload() p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DropCollectionEventType) assert.Equal(t, event2.TypeCode, DropCollectionEventType)
@ -782,21 +809,21 @@ func TestDDLBinlog2(t *testing.T) {
e1, err := w.NextCreatePartitionEventWriter() e1, err := w.NextCreatePartitionEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
e2, err := w.NextDropPartitionEventWriter() e2, err := w.NextDropPartitionEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true}) err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12}) err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e2.SetEventTimestamp(300, 400) e2.SetEventTimestamp(300, 400)
@ -863,6 +890,11 @@ func TestDDLBinlog2(t *testing.T) {
assert.Equal(t, fieldID, int64(-1)) assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID)) pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000)) assert.Equal(t, startts, int64(1000))
@ -941,9 +973,10 @@ func TestDDLBinlog2(t *testing.T) {
// insert e1, payload // insert e1, payload
e1Payload := buf[pos:e1NxtPos] e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload() e1a, valids, err := e1r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
e1r.Close() e1r.Close()
@ -983,9 +1016,10 @@ func TestDDLBinlog2(t *testing.T) {
// insert e2, payload // insert e2, payload
e2Payload := buf[pos:] e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err) assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload() e2a, valids, err := e2r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close() e2r.Close()
@ -998,7 +1032,8 @@ func TestDDLBinlog2(t *testing.T) {
event1, err := r.NextEventReader() event1, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event1) assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload() p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, CreatePartitionEventType) assert.Equal(t, event1.TypeCode, CreatePartitionEventType)
@ -1010,7 +1045,8 @@ func TestDDLBinlog2(t *testing.T) {
event2, err := r.NextEventReader() event2, err := r.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, event2) assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload() p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DropPartitionEventType) assert.Equal(t, event2.TypeCode, DropPartitionEventType)
@ -1042,7 +1078,7 @@ func TestIndexFileBinlog(t *testing.T) {
e, err := w.NextIndexFileEventWriter() e, err := w.NextIndexFileEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e.AddByteToPayload(payload) err = e.AddByteToPayload(payload, nil)
assert.NoError(t, err) assert.NoError(t, err)
e.SetEventTimestamp(timestamp, timestamp) e.SetEventTimestamp(timestamp, timestamp)
@ -1104,6 +1140,11 @@ func TestIndexFileBinlog(t *testing.T) {
assert.Equal(t, fieldID, fID) assert.Equal(t, fieldID, fID)
pos += int(unsafe.Sizeof(fID)) pos += int(unsafe.Sizeof(fID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(timestamp)) assert.Equal(t, startts, int64(timestamp))
@ -1171,7 +1212,7 @@ func TestIndexFileBinlogV2(t *testing.T) {
e, err := w.NextIndexFileEventWriter() e, err := w.NextIndexFileEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload)) err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true)
assert.NoError(t, err) assert.NoError(t, err)
e.SetEventTimestamp(timestamp, timestamp) e.SetEventTimestamp(timestamp, timestamp)
@ -1233,6 +1274,11 @@ func TestIndexFileBinlogV2(t *testing.T) {
assert.Equal(t, fieldID, fID) assert.Equal(t, fieldID, fID)
pos += int(unsafe.Sizeof(fID)) pos += int(unsafe.Sizeof(fID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp // descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos) startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(timestamp)) assert.Equal(t, startts, int64(timestamp))
@ -1309,17 +1355,17 @@ func TestNewBinlogReaderError(t *testing.T) {
assert.Nil(t, reader) assert.Nil(t, reader)
assert.Error(t, err) assert.Error(t, err)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
w.SetEventTimeStamp(1000, 2000) w.SetEventTimeStamp(1000, 2000)
e1, err := w.NextInsertEventWriter() e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
@ -1348,7 +1394,7 @@ func TestNewBinlogReaderError(t *testing.T) {
} }
func TestNewBinlogWriterTsError(t *testing.T) { func TestNewBinlogWriterTsError(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
_, err := w.GetBuffer() _, err := w.GetBuffer()
assert.Error(t, err) assert.Error(t, err)
@ -1376,21 +1422,21 @@ func TestNewBinlogWriterTsError(t *testing.T) {
} }
func TestInsertBinlogWriterCloseError(t *testing.T) { func TestInsertBinlogWriterCloseError(t *testing.T) {
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := insertWriter.NextInsertEventWriter() e1, err := insertWriter.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
sizeTotal := 2000000 sizeTotal := 2000000
insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
insertWriter.SetEventTimeStamp(1000, 2000) insertWriter.SetEventTimeStamp(1000, 2000)
err = insertWriter.Finish() err = insertWriter.Finish()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, insertWriter.buffer) assert.NotNil(t, insertWriter.buffer)
insertEventWriter, err := insertWriter.NextInsertEventWriter() insertEventWriter, err := insertWriter.NextInsertEventWriter(false)
assert.Nil(t, insertEventWriter) assert.Nil(t, insertEventWriter)
assert.Error(t, err) assert.Error(t, err)
insertWriter.Close() insertWriter.Close()
@ -1402,7 +1448,7 @@ func TestDeleteBinlogWriteCloseError(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
sizeTotal := 2000000 sizeTotal := 2000000
deleteWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) deleteWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
deleteWriter.SetEventTimeStamp(1000, 2000) deleteWriter.SetEventTimeStamp(1000, 2000)
@ -1423,7 +1469,7 @@ func TestDDBinlogWriteCloseError(t *testing.T) {
sizeTotal := 2000000 sizeTotal := 2000000
ddBinlogWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) ddBinlogWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
@ -1499,7 +1545,7 @@ func (e *testEvent) SetOffset(offset int32) {
var _ EventWriter = (*testEvent)(nil) var _ EventWriter = (*testEvent)(nil)
func TestWriterListError(t *testing.T) { func TestWriterListError(t *testing.T) {
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
sizeTotal := 2000000 sizeTotal := 2000000
insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
errorEvent := &testEvent{} errorEvent := &testEvent{}

View File

@ -150,7 +150,7 @@ type InsertBinlogWriter struct {
} }
// NextInsertEventWriter returns an event writer to write insert data to an event. // NextInsertEventWriter returns an event writer to write insert data to an event.
func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEventWriter, error) { func (writer *InsertBinlogWriter) NextInsertEventWriter(nullable bool, dim ...int) (*insertEventWriter, error) {
if writer.isClosed() { if writer.isClosed() {
return nil, fmt.Errorf("binlog has closed") return nil, fmt.Errorf("binlog has closed")
} }
@ -161,9 +161,9 @@ func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEven
if len(dim) != 1 { if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers") return nil, fmt.Errorf("incorrect input numbers")
} }
event, err = newInsertEventWriter(writer.PayloadDataType, dim[0]) event, err = newInsertEventWriter(writer.PayloadDataType, nullable, dim[0])
} else { } else {
event, err = newInsertEventWriter(writer.PayloadDataType) event, err = newInsertEventWriter(writer.PayloadDataType, nullable)
} }
if err != nil { if err != nil {
return nil, err return nil, err
@ -271,13 +271,14 @@ func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEvent
} }
// NewInsertBinlogWriter creates InsertBinlogWriter to write binlog file. // NewInsertBinlogWriter creates InsertBinlogWriter to write binlog file.
func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64) *InsertBinlogWriter { func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64, nullable bool) *InsertBinlogWriter {
descriptorEvent := newDescriptorEvent() descriptorEvent := newDescriptorEvent()
descriptorEvent.PayloadDataType = dataType descriptorEvent.PayloadDataType = dataType
descriptorEvent.CollectionID = collectionID descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = FieldID descriptorEvent.FieldID = FieldID
descriptorEvent.Nullable = nullable
w := &InsertBinlogWriter{ w := &InsertBinlogWriter{
baseBinlogWriter: baseBinlogWriter{ baseBinlogWriter: baseBinlogWriter{

View File

@ -26,15 +26,15 @@ import (
) )
func TestBinlogWriterReader(t *testing.T) { func TestBinlogWriterReader(t *testing.T) {
binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40) binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40, false)
tp := binlogWriter.GetBinlogType() tp := binlogWriter.GetBinlogType()
assert.Equal(t, tp, InsertBinlog) assert.Equal(t, tp, InsertBinlog)
binlogWriter.SetEventTimeStamp(1000, 2000) binlogWriter.SetEventTimeStamp(1000, 2000)
defer binlogWriter.Close() defer binlogWriter.Close()
eventWriter, err := binlogWriter.NextInsertEventWriter() eventWriter, err := binlogWriter.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}) err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
_, err = binlogWriter.GetBuffer() _, err = binlogWriter.GetBuffer()
assert.Error(t, err) assert.Error(t, err)
@ -50,7 +50,7 @@ func TestBinlogWriterReader(t *testing.T) {
nums, err = binlogWriter.GetRowNums() nums, err = binlogWriter.GetRowNums()
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, 3, nums) assert.EqualValues(t, 3, nums)
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}) err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
nums, err = binlogWriter.GetRowNums() nums, err = binlogWriter.GetRowNums()
assert.NoError(t, err) assert.NoError(t, err)
@ -64,9 +64,9 @@ func TestBinlogWriterReader(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
eventReader, err := binlogReader.NextEventReader() eventReader, err := binlogReader.NextEventReader()
assert.NoError(t, err) assert.NoError(t, err)
_, err = eventReader.GetInt8FromPayload() _, _, err = eventReader.GetInt8FromPayload()
assert.Error(t, err) assert.Error(t, err)
payload, err := eventReader.GetInt32FromPayload() payload, _, err := eventReader.GetInt32FromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, 3, len(payload)) assert.EqualValues(t, 3, len(payload))
assert.EqualValues(t, 1, payload[0]) assert.EqualValues(t, 1, payload[0])

View File

@ -247,11 +247,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
for _, field := range insertCodec.Schema.Schema.Fields { for _, field := range insertCodec.Schema.Schema.Fields {
// encode fields // encode fields
writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID) writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID, field.GetNullable())
var eventWriter *insertEventWriter var eventWriter *insertEventWriter
var err error var err error
var dim int64 var dim int64
if typeutil.IsVectorType(field.DataType) { if typeutil.IsVectorType(field.DataType) {
if field.GetNullable() {
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("vectorType not support null, fieldName: %s", field.GetName()))
}
switch field.DataType { switch field.DataType {
case schemapb.DataType_FloatVector, case schemapb.DataType_FloatVector,
schemapb.DataType_BinaryVector, schemapb.DataType_BinaryVector,
@ -261,14 +264,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
if err != nil { if err != nil {
return nil, err return nil, err
} }
eventWriter, err = writer.NextInsertEventWriter(int(dim)) eventWriter, err = writer.NextInsertEventWriter(field.GetNullable(), int(dim))
case schemapb.DataType_SparseFloatVector: case schemapb.DataType_SparseFloatVector:
eventWriter, err = writer.NextInsertEventWriter() eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
default: default:
return nil, fmt.Errorf("undefined data type %d", field.DataType) return nil, fmt.Errorf("undefined data type %d", field.DataType)
} }
} else { } else {
eventWriter, err = writer.NextInsertEventWriter() eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
} }
if err != nil { if err != nil {
writer.Close() writer.Close()
@ -323,48 +326,60 @@ func AddFieldDataToPayload(eventWriter *insertEventWriter, dataType schemapb.Dat
var err error var err error
switch dataType { switch dataType {
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data); err != nil { if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data, singleData.(*BoolFieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data); err != nil { if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data, singleData.(*Int8FieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data); err != nil { if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data, singleData.(*Int16FieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data); err != nil { if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data, singleData.(*Int32FieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data); err != nil { if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data, singleData.(*Int64FieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Float: case schemapb.DataType_Float:
if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data); err != nil { if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data, singleData.(*FloatFieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_Double: case schemapb.DataType_Double:
if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data); err != nil { if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data, singleData.(*DoubleFieldData).ValidData); err != nil {
return err return err
} }
case schemapb.DataType_String, schemapb.DataType_VarChar: case schemapb.DataType_String, schemapb.DataType_VarChar:
for _, singleString := range singleData.(*StringFieldData).Data { for i, singleString := range singleData.(*StringFieldData).Data {
if err = eventWriter.AddOneStringToPayload(singleString); err != nil { isValid := true
if len(singleData.(*StringFieldData).ValidData) != 0 {
isValid = singleData.(*StringFieldData).ValidData[i]
}
if err = eventWriter.AddOneStringToPayload(singleString, isValid); err != nil {
return err return err
} }
} }
case schemapb.DataType_Array: case schemapb.DataType_Array:
for _, singleArray := range singleData.(*ArrayFieldData).Data { for i, singleArray := range singleData.(*ArrayFieldData).Data {
if err = eventWriter.AddOneArrayToPayload(singleArray); err != nil { isValid := true
if len(singleData.(*ArrayFieldData).ValidData) != 0 {
isValid = singleData.(*ArrayFieldData).ValidData[i]
}
if err = eventWriter.AddOneArrayToPayload(singleArray, isValid); err != nil {
return err return err
} }
} }
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
for _, singleJSON := range singleData.(*JSONFieldData).Data { for i, singleJSON := range singleData.(*JSONFieldData).Data {
if err = eventWriter.AddOneJSONToPayload(singleJSON); err != nil { isValid := true
if len(singleData.(*JSONFieldData).ValidData) != 0 {
isValid = singleData.(*JSONFieldData).ValidData[i]
}
if err = eventWriter.AddOneJSONToPayload(singleJSON, isValid); err != nil {
return err return err
} }
} }
@ -448,7 +463,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
} }
switch dataType { switch dataType {
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
singleData, err := eventReader.GetBoolFromPayload() singleData, validData, err := eventReader.GetBoolFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -463,11 +478,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
boolFieldData := insertData.Data[fieldID].(*BoolFieldData) boolFieldData := insertData.Data[fieldID].(*BoolFieldData)
boolFieldData.Data = append(boolFieldData.Data, singleData...) boolFieldData.Data = append(boolFieldData.Data, singleData...)
boolFieldData.ValidData = append(boolFieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = boolFieldData insertData.Data[fieldID] = boolFieldData
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
singleData, err := eventReader.GetInt8FromPayload() singleData, validData, err := eventReader.GetInt8FromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -482,11 +498,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int8FieldData := insertData.Data[fieldID].(*Int8FieldData) int8FieldData := insertData.Data[fieldID].(*Int8FieldData)
int8FieldData.Data = append(int8FieldData.Data, singleData...) int8FieldData.Data = append(int8FieldData.Data, singleData...)
int8FieldData.ValidData = append(int8FieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = int8FieldData insertData.Data[fieldID] = int8FieldData
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
singleData, err := eventReader.GetInt16FromPayload() singleData, validData, err := eventReader.GetInt16FromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -501,11 +518,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int16FieldData := insertData.Data[fieldID].(*Int16FieldData) int16FieldData := insertData.Data[fieldID].(*Int16FieldData)
int16FieldData.Data = append(int16FieldData.Data, singleData...) int16FieldData.Data = append(int16FieldData.Data, singleData...)
int16FieldData.ValidData = append(int16FieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = int16FieldData insertData.Data[fieldID] = int16FieldData
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
singleData, err := eventReader.GetInt32FromPayload() singleData, validData, err := eventReader.GetInt32FromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -520,11 +538,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int32FieldData := insertData.Data[fieldID].(*Int32FieldData) int32FieldData := insertData.Data[fieldID].(*Int32FieldData)
int32FieldData.Data = append(int32FieldData.Data, singleData...) int32FieldData.Data = append(int32FieldData.Data, singleData...)
int32FieldData.ValidData = append(int32FieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = int32FieldData insertData.Data[fieldID] = int32FieldData
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
singleData, err := eventReader.GetInt64FromPayload() singleData, validData, err := eventReader.GetInt64FromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -539,11 +558,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int64FieldData := insertData.Data[fieldID].(*Int64FieldData) int64FieldData := insertData.Data[fieldID].(*Int64FieldData)
int64FieldData.Data = append(int64FieldData.Data, singleData...) int64FieldData.Data = append(int64FieldData.Data, singleData...)
int64FieldData.ValidData = append(int64FieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = int64FieldData insertData.Data[fieldID] = int64FieldData
case schemapb.DataType_Float: case schemapb.DataType_Float:
singleData, err := eventReader.GetFloatFromPayload() singleData, validData, err := eventReader.GetFloatFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -558,11 +578,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
floatFieldData := insertData.Data[fieldID].(*FloatFieldData) floatFieldData := insertData.Data[fieldID].(*FloatFieldData)
floatFieldData.Data = append(floatFieldData.Data, singleData...) floatFieldData.Data = append(floatFieldData.Data, singleData...)
floatFieldData.ValidData = append(floatFieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = floatFieldData insertData.Data[fieldID] = floatFieldData
case schemapb.DataType_Double: case schemapb.DataType_Double:
singleData, err := eventReader.GetDoubleFromPayload() singleData, validData, err := eventReader.GetDoubleFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -577,11 +598,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
doubleFieldData := insertData.Data[fieldID].(*DoubleFieldData) doubleFieldData := insertData.Data[fieldID].(*DoubleFieldData)
doubleFieldData.Data = append(doubleFieldData.Data, singleData...) doubleFieldData.Data = append(doubleFieldData.Data, singleData...)
doubleFieldData.ValidData = append(doubleFieldData.ValidData, validData...)
totalLength += len(singleData) totalLength += len(singleData)
insertData.Data[fieldID] = doubleFieldData insertData.Data[fieldID] = doubleFieldData
case schemapb.DataType_String, schemapb.DataType_VarChar: case schemapb.DataType_String, schemapb.DataType_VarChar:
stringPayload, err := eventReader.GetStringFromPayload() stringPayload, validData, err := eventReader.GetStringFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -594,14 +616,15 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
} }
} }
stringFieldData := insertData.Data[fieldID].(*StringFieldData) stringFieldData := insertData.Data[fieldID].(*StringFieldData)
stringFieldData.DataType = dataType
stringFieldData.Data = append(stringFieldData.Data, stringPayload...) stringFieldData.Data = append(stringFieldData.Data, stringPayload...)
stringFieldData.DataType = dataType stringFieldData.ValidData = append(stringFieldData.ValidData, validData...)
totalLength += len(stringPayload) totalLength += len(stringPayload)
insertData.Data[fieldID] = stringFieldData insertData.Data[fieldID] = stringFieldData
case schemapb.DataType_Array: case schemapb.DataType_Array:
arrayPayload, err := eventReader.GetArrayFromPayload() arrayPayload, validData, err := eventReader.GetArrayFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -616,11 +639,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
arrayFieldData := insertData.Data[fieldID].(*ArrayFieldData) arrayFieldData := insertData.Data[fieldID].(*ArrayFieldData)
arrayFieldData.Data = append(arrayFieldData.Data, arrayPayload...) arrayFieldData.Data = append(arrayFieldData.Data, arrayPayload...)
arrayFieldData.ValidData = append(arrayFieldData.ValidData, validData...)
totalLength += len(arrayPayload) totalLength += len(arrayPayload)
insertData.Data[fieldID] = arrayFieldData insertData.Data[fieldID] = arrayFieldData
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
jsonPayload, err := eventReader.GetJSONFromPayload() jsonPayload, validData, err := eventReader.GetJSONFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -635,6 +659,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
jsonFieldData := insertData.Data[fieldID].(*JSONFieldData) jsonFieldData := insertData.Data[fieldID].(*JSONFieldData)
jsonFieldData.Data = append(jsonFieldData.Data, jsonPayload...) jsonFieldData.Data = append(jsonFieldData.Data, jsonPayload...)
jsonFieldData.ValidData = append(jsonFieldData.ValidData, validData...)
totalLength += len(jsonPayload) totalLength += len(jsonPayload)
insertData.Data[fieldID] = jsonFieldData insertData.Data[fieldID] = jsonFieldData
@ -934,7 +959,7 @@ func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID Uni
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = eventWriter.AddOneStringToPayload(string(serializedPayload)) err = eventWriter.AddOneStringToPayload(string(serializedPayload), true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1084,7 +1109,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
for _, singleTs := range ts { for _, singleTs := range ts {
int64Ts = append(int64Ts, int64(singleTs)) int64Ts = append(int64Ts, int64(singleTs))
} }
err = eventWriter.AddInt64ToPayload(int64Ts) err = eventWriter.AddInt64ToPayload(int64Ts, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1120,7 +1145,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = eventWriter.AddOneStringToPayload(req) err = eventWriter.AddOneStringToPayload(req, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1130,7 +1155,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = eventWriter.AddOneStringToPayload(req) err = eventWriter.AddOneStringToPayload(req, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1140,7 +1165,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = eventWriter.AddOneStringToPayload(req) err = eventWriter.AddOneStringToPayload(req, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1150,7 +1175,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = eventWriter.AddOneStringToPayload(req) err = eventWriter.AddOneStringToPayload(req, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1211,7 +1236,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
} }
switch dataType { switch dataType {
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
int64Ts, err := eventReader.GetInt64FromPayload() int64Ts, _, err := eventReader.GetInt64FromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()
@ -1221,7 +1246,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
resultTs = append(resultTs, Timestamp(singleTs)) resultTs = append(resultTs, Timestamp(singleTs))
} }
case schemapb.DataType_String: case schemapb.DataType_String:
stringPayload, err := eventReader.GetStringFromPayload() stringPayload, _, err := eventReader.GetStringFromPayload()
if err != nil { if err != nil {
eventReader.Close() eventReader.Close()
binlogReader.Close() binlogReader.Close()

View File

@ -201,6 +201,62 @@ func genTestCollectionMeta() *etcdpb.CollectionMeta {
} }
} }
func TestInsertCodecFailed(t *testing.T) {
t.Run("vector field not support null", func(t *testing.T) {
tests := []struct {
description string
dataType schemapb.DataType
}{
{"nullable FloatVector field", schemapb.DataType_FloatVector},
{"nullable Float16Vector field", schemapb.DataType_Float16Vector},
{"nullable BinaryVector field", schemapb.DataType_BinaryVector},
{"nullable BFloat16Vector field", schemapb.DataType_BFloat16Vector},
{"nullable SparseFloatVector field", schemapb.DataType_SparseFloatVector},
}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
schema := &etcdpb.CollectionMeta{
ID: CollectionID,
CreateTime: 1,
SegmentIDs: []int64{SegmentID},
PartitionTags: []string{"partition_0", "partition_1"},
Schema: &schemapb.CollectionSchema{
Name: "schema",
Description: "schema",
Fields: []*schemapb.FieldSchema{
{
FieldID: RowIDField,
Name: "row_id",
Description: "row_id",
DataType: schemapb.DataType_Int64,
},
{
FieldID: TimestampField,
Name: "Timestamp",
Description: "Timestamp",
DataType: schemapb.DataType_Int64,
},
{
DataType: test.dataType,
},
},
},
}
insertCodec := NewInsertCodecWithSchema(schema)
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
},
}
_, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
assert.Error(t, err)
})
}
})
}
func TestInsertCodec(t *testing.T) { func TestInsertCodec(t *testing.T) {
schema := genTestCollectionMeta() schema := genTestCollectionMeta()
insertCodec := NewInsertCodecWithSchema(schema) insertCodec := NewInsertCodecWithSchema(schema)
@ -374,16 +430,16 @@ func TestInsertCodec(t *testing.T) {
insertDataEmpty := &InsertData{ insertDataEmpty := &InsertData{
Data: map[int64]FieldData{ Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}}, RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}}, TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}}, BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}}, Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}}, Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}}, Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}}, Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}}, FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}}, DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar}, StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8}, BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4}, FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
Float16VectorField: &Float16VectorFieldData{[]byte{}, 4}, Float16VectorField: &Float16VectorFieldData{[]byte{}, 4},
@ -394,8 +450,8 @@ func TestInsertCodec(t *testing.T) {
Contents: [][]byte{}, Contents: [][]byte{},
}, },
}, },
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}}, ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}, nil},
JSONField: &JSONFieldData{[][]byte{}}, JSONField: &JSONFieldData{[][]byte{}, nil},
}, },
} }
b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty) b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
@ -557,7 +613,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ { for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
ts := dData.Tss[i] ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts)) err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true)
assert.NoError(t, err) assert.NoError(t, err)
sizeTotal += binary.Size(int64PkValue) sizeTotal += binary.Size(int64PkValue)
sizeTotal += binary.Size(ts) sizeTotal += binary.Size(ts)
@ -595,7 +651,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ { for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
ts := dData.Tss[i] ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts)) err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts), true)
assert.NoError(t, err) assert.NoError(t, err)
} }
eventWriter.SetEventTimestamp(100, 200) eventWriter.SetEventTimestamp(100, 200)
@ -626,7 +682,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ { for i := int64(0); i < dData.RowCount; i++ {
ts := dData.Tss[i] ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts)) err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts), true)
assert.NoError(t, err) assert.NoError(t, err)
} }
eventWriter.SetEventTimestamp(100, 200) eventWriter.SetEventTimestamp(100, 200)
@ -657,7 +713,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ { for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue)) err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue), true)
assert.NoError(t, err) assert.NoError(t, err)
} }
eventWriter.SetEventTimestamp(100, 200) eventWriter.SetEventTimestamp(100, 200)
@ -845,16 +901,16 @@ func TestMemorySize(t *testing.T) {
insertDataEmpty := &InsertData{ insertDataEmpty := &InsertData{
Data: map[int64]FieldData{ Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}}, RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}}, TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}}, BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}}, Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}}, Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}}, Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}}, Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}}, FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}}, DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar}, StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8}, BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4}, FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
}, },
@ -920,24 +976,24 @@ func TestDeleteData(t *testing.T) {
} }
func TestAddFieldDataToPayload(t *testing.T) { func TestAddFieldDataToPayload(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e, _ := w.NextInsertEventWriter() e, _ := w.NextInsertEventWriter(false)
var err error var err error
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}}) err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar}) err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Array, &ArrayFieldData{ err = AddFieldDataToPayload(e, schemapb.DataType_Array, &ArrayFieldData{
ElementType: schemapb.DataType_VarChar, ElementType: schemapb.DataType_VarChar,
@ -948,7 +1004,7 @@ func TestAddFieldDataToPayload(t *testing.T) {
}}, }},
}) })
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}}) err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}, nil})
assert.Error(t, err) assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_BinaryVector, &BinaryVectorFieldData{[]byte{}, 8}) err = AddFieldDataToPayload(e, schemapb.DataType_BinaryVector, &BinaryVectorFieldData{[]byte{}, 8})
assert.Error(t, err) assert.Error(t, err)

View File

@ -46,6 +46,7 @@ type DescriptorEventDataFixPart struct {
PartitionID int64 PartitionID int64
SegmentID int64 SegmentID int64
FieldID int64 FieldID int64
Nullable bool
StartTimestamp typeutil.Timestamp StartTimestamp typeutil.Timestamp
EndTimestamp typeutil.Timestamp EndTimestamp typeutil.Timestamp
PayloadDataType schemapb.DataType PayloadDataType schemapb.DataType
@ -350,6 +351,7 @@ func newDescriptorEventData() *descriptorEventData {
StartTimestamp: 0, StartTimestamp: 0,
EndTimestamp: 0, EndTimestamp: 0,
PayloadDataType: -1, PayloadDataType: -1,
Nullable: false,
}, },
PostHeaderLengths: []uint8{}, PostHeaderLengths: []uint8{},
Extras: make(map[string]interface{}), Extras: make(map[string]interface{}),

View File

@ -85,7 +85,7 @@ func (reader *EventReader) Close() {
} }
} }
func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventReader, error) { func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer, nullable bool) (*EventReader, error) {
reader := &EventReader{ reader := &EventReader{
eventHeader: eventHeader{ eventHeader: eventHeader{
baseEventHeader{}, baseEventHeader{},
@ -103,7 +103,7 @@ func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventRea
next := int(reader.EventLength - reader.eventHeader.GetMemoryUsageInBytes() - reader.GetEventDataFixPartSize()) next := int(reader.EventLength - reader.eventHeader.GetMemoryUsageInBytes() - reader.GetEventDataFixPartSize())
payloadBuffer := buffer.Next(next) payloadBuffer := buffer.Next(next)
payloadReader, err := NewPayloadReader(datatype, payloadBuffer) payloadReader, err := NewPayloadReader(datatype, payloadBuffer, nullable)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -89,17 +89,25 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))) int(unsafe.Sizeof(segID)))
assert.Equal(t, fieldID, int64(-1)) assert.Equal(t, fieldID, int64(-1))
startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+ nullable := UnsafeReadBool(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+ int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))) int(unsafe.Sizeof(fieldID)))
assert.Equal(t, nullable, false)
startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable)))
assert.Equal(t, startTs, int64(0)) assert.Equal(t, startTs, int64(0))
endTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+ endTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+ int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+ int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable))+
int(unsafe.Sizeof(startTs))) int(unsafe.Sizeof(startTs)))
assert.Equal(t, endTs, int64(0)) assert.Equal(t, endTs, int64(0))
colType := UnsafeReadInt32(buffer, binary.Size(eventHeader{})+ colType := UnsafeReadInt32(buffer, binary.Size(eventHeader{})+
@ -107,6 +115,7 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+ int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable))+
int(unsafe.Sizeof(startTs))+ int(unsafe.Sizeof(startTs))+
int(unsafe.Sizeof(endTs))) int(unsafe.Sizeof(endTs)))
assert.Equal(t, colType, int32(-1)) assert.Equal(t, colType, int32(-1))
@ -116,6 +125,7 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID)) + int(unsafe.Sizeof(partID)) +
int(unsafe.Sizeof(segID)) + int(unsafe.Sizeof(segID)) +
int(unsafe.Sizeof(fieldID)) + int(unsafe.Sizeof(fieldID)) +
int(unsafe.Sizeof(nullable)) +
int(unsafe.Sizeof(startTs)) + int(unsafe.Sizeof(startTs)) +
int(unsafe.Sizeof(endTs)) + int(unsafe.Sizeof(endTs)) +
int(unsafe.Sizeof(colType)) int(unsafe.Sizeof(colType))
@ -161,177 +171,178 @@ func TestInsertEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(dt, pBuf) pR, err := NewPayloadReader(dt, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload() values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, values, ev) assert.Equal(t, values, ev)
pR.Close() pR.Close()
r, err := newEventReader(dt, bytes.NewBuffer(wBuf)) r, err := newEventReader(dt, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload() payload, nulls, _, err := r.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Nil(t, nulls)
assert.Equal(t, payload, ev) assert.Equal(t, payload, ev)
r.Close() r.Close()
} }
t.Run("insert_bool", func(t *testing.T) { t.Run("insert_bool", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Bool) w, err := newInsertEventWriter(schemapb.DataType_Bool, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Bool, w, insertT(t, schemapb.DataType_Bool, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]bool{true, false, true}) return w.AddDataToPayload([]bool{true, false, true}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]bool{false, true, false}) return w.AddDataToPayload([]bool{false, true, false}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]bool{true, false, true, false, true, false}) []bool{true, false, true, false, true, false})
}) })
t.Run("insert_int8", func(t *testing.T) { t.Run("insert_int8", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int8) w, err := newInsertEventWriter(schemapb.DataType_Int8, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int8, w, insertT(t, schemapb.DataType_Int8, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int8{1, 2, 3}) return w.AddDataToPayload([]int8{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int8{4, 5, 6}) return w.AddDataToPayload([]int8{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]int8{1, 2, 3, 4, 5, 6}) []int8{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_int16", func(t *testing.T) { t.Run("insert_int16", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int16) w, err := newInsertEventWriter(schemapb.DataType_Int16, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int16, w, insertT(t, schemapb.DataType_Int16, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int16{1, 2, 3}) return w.AddDataToPayload([]int16{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int16{4, 5, 6}) return w.AddDataToPayload([]int16{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]int16{1, 2, 3, 4, 5, 6}) []int16{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_int32", func(t *testing.T) { t.Run("insert_int32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int32) w, err := newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int32, w, insertT(t, schemapb.DataType_Int32, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int32{1, 2, 3}) return w.AddDataToPayload([]int32{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int32{4, 5, 6}) return w.AddDataToPayload([]int32{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]int32{1, 2, 3, 4, 5, 6}) []int32{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_int64", func(t *testing.T) { t.Run("insert_int64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int64) w, err := newInsertEventWriter(schemapb.DataType_Int64, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int64, w, insertT(t, schemapb.DataType_Int64, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int64{1, 2, 3}) return w.AddDataToPayload([]int64{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int64{4, 5, 6}) return w.AddDataToPayload([]int64{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]int64{1, 2, 3, 4, 5, 6}) []int64{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_float32", func(t *testing.T) { t.Run("insert_float32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Float) w, err := newInsertEventWriter(schemapb.DataType_Float, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Float, w, insertT(t, schemapb.DataType_Float, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{1, 2, 3}) return w.AddDataToPayload([]float32{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{4, 5, 6}) return w.AddDataToPayload([]float32{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]float32{1, 2, 3, 4, 5, 6}) []float32{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_float64", func(t *testing.T) { t.Run("insert_float64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Double) w, err := newInsertEventWriter(schemapb.DataType_Double, false)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Double, w, insertT(t, schemapb.DataType_Double, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float64{1, 2, 3}) return w.AddDataToPayload([]float64{1, 2, 3}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float64{4, 5, 6}) return w.AddDataToPayload([]float64{4, 5, 6}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
}, },
[]float64{1, 2, 3, 4, 5, 6}) []float64{1, 2, 3, 4, 5, 6})
}) })
t.Run("insert_binary_vector", func(t *testing.T) { t.Run("insert_binary_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, 16) w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, false, 16)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_BinaryVector, w, insertT(t, schemapb.DataType_BinaryVector, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]byte{1, 2, 3, 4}, 16) return w.AddDataToPayload([]byte{1, 2, 3, 4}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]byte{5, 6, 7, 8}, 16) return w.AddDataToPayload([]byte{5, 6, 7, 8}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 16) return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil)
}, },
[]byte{1, 2, 3, 4, 5, 6, 7, 8}) []byte{1, 2, 3, 4, 5, 6, 7, 8})
}) })
t.Run("insert_float_vector", func(t *testing.T) { t.Run("insert_float_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_FloatVector, 2) w, err := newInsertEventWriter(schemapb.DataType_FloatVector, false, 2)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_FloatVector, w, insertT(t, schemapb.DataType_FloatVector, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{1, 2, 3, 4}, 2) return w.AddDataToPayload([]float32{1, 2, 3, 4}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{5, 6, 7, 8}, 2) return w.AddDataToPayload([]float32{5, 6, 7, 8}, nil)
}, },
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 2) return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil)
}, },
[]float32{1, 2, 3, 4, 5, 6, 7, 8}) []float32{1, 2, 3, 4, 5, 6, 7, 8})
}) })
t.Run("insert_string", func(t *testing.T) { t.Run("insert_string", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String) w, err := newInsertEventWriter(schemapb.DataType_String, false)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -349,20 +360,20 @@ func TestInsertEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
assert.Equal(t, s[2], "abcdefg") assert.Equal(t, s[2], "abcdefg")
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
s, err = pR.GetStringFromPayload() s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -379,13 +390,13 @@ func TestDeleteEvent(t *testing.T) {
w, err := newDeleteEventWriter(schemapb.DataType_String) w, err := newDeleteEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -403,10 +414,10 @@ func TestDeleteEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -414,10 +425,10 @@ func TestDeleteEvent(t *testing.T) {
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
s, err = pR.GetStringFromPayload() s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -439,11 +450,11 @@ func TestCreateCollectionEvent(t *testing.T) {
w, err := newCreateCollectionEventWriter(schemapb.DataType_Int64) w, err := newCreateCollectionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3}) err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6}) err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6}) err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -461,16 +472,16 @@ func TestCreateCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload() values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload() payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -481,13 +492,13 @@ func TestCreateCollectionEvent(t *testing.T) {
w, err := newCreateCollectionEventWriter(schemapb.DataType_String) w, err := newCreateCollectionEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -505,10 +516,10 @@ func TestCreateCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -516,10 +527,10 @@ func TestCreateCollectionEvent(t *testing.T) {
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), true)
assert.NoError(t, err) assert.NoError(t, err)
s, err = pR.GetStringFromPayload() s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -541,11 +552,11 @@ func TestDropCollectionEvent(t *testing.T) {
w, err := newDropCollectionEventWriter(schemapb.DataType_Int64) w, err := newDropCollectionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3}) err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6}) err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6}) err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -563,16 +574,16 @@ func TestDropCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload() values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload() payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -583,13 +594,13 @@ func TestDropCollectionEvent(t *testing.T) {
w, err := newDropCollectionEventWriter(schemapb.DataType_String) w, err := newDropCollectionEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -607,10 +618,10 @@ func TestDropCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -618,10 +629,10 @@ func TestDropCollectionEvent(t *testing.T) {
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
s, err = r.GetStringFromPayload() s, _, err = r.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -643,11 +654,11 @@ func TestCreatePartitionEvent(t *testing.T) {
w, err := newCreatePartitionEventWriter(schemapb.DataType_Int64) w, err := newCreatePartitionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3}) err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6}) err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6}) err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -665,16 +676,16 @@ func TestCreatePartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload() values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload() payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -685,13 +696,13 @@ func TestCreatePartitionEvent(t *testing.T) {
w, err := newCreatePartitionEventWriter(schemapb.DataType_String) w, err := newCreatePartitionEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -709,10 +720,10 @@ func TestCreatePartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -720,10 +731,10 @@ func TestCreatePartitionEvent(t *testing.T) {
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
s, err = pR.GetStringFromPayload() s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -745,11 +756,11 @@ func TestDropPartitionEvent(t *testing.T) {
w, err := newDropPartitionEventWriter(schemapb.DataType_Int64) w, err := newDropPartitionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3}) err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6}) err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6}) err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -767,16 +778,16 @@ func TestDropPartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload() values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload() payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -787,13 +798,13 @@ func TestDropPartitionEvent(t *testing.T) {
w, err := newDropPartitionEventWriter(schemapb.DataType_String) w, err := newDropPartitionEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("567890") err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg") err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3}) err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err) assert.Error(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -811,10 +822,10 @@ func TestDropPartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
s, err := pR.GetStringFromPayload() s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -822,10 +833,10 @@ func TestDropPartitionEvent(t *testing.T) {
pR.Close() pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
s, err = pR.GetStringFromPayload() s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, s[0], "1234") assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890") assert.Equal(t, s[1], "567890")
@ -843,7 +854,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytes() payload := funcutil.GenRandomBytes()
err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload)) err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
@ -862,10 +873,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, pR.numRows, int64(1)) assert.Equal(t, pR.numRows, int64(1))
value, err := pR.GetStringFromPayload() value, _, err := pR.GetStringFromPayload()
assert.Equal(t, len(value), 1) assert.Equal(t, len(value), 1)
@ -880,7 +891,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytes() payload := funcutil.GenRandomBytes()
err = w.AddByteToPayload(payload) err = w.AddByteToPayload(payload, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
@ -899,10 +910,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false)
assert.Equal(t, pR.numRows, int64(len(payload))) assert.Equal(t, pR.numRows, int64(len(payload)))
assert.NoError(t, err) assert.NoError(t, err)
value, err := pR.GetByteFromPayload() value, _, err := pR.GetByteFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, value) assert.Equal(t, payload, value)
pR.Close() pR.Close()
@ -914,7 +925,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytesWithLength(1000) payload := funcutil.GenRandomBytesWithLength(1000)
err = w.AddByteToPayload(payload) err = w.AddByteToPayload(payload, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
@ -933,10 +944,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:] pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf) pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false)
assert.Equal(t, pR.numRows, int64(len(payload))) assert.Equal(t, pR.numRows, int64(len(payload)))
assert.NoError(t, err) assert.NoError(t, err)
value, err := pR.GetByteFromPayload() value, _, err := pR.GetByteFromPayload()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, payload, value) assert.Equal(t, payload, value)
pR.Close() pR.Close()
@ -1044,7 +1055,7 @@ func TestReadFixPartError(t *testing.T) {
func TestEventReaderError(t *testing.T) { func TestEventReaderError(t *testing.T) {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
r, err := newEventReader(schemapb.DataType_Int64, buf) r, err := newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r) assert.Nil(t, r)
assert.Error(t, err) assert.Error(t, err)
@ -1052,7 +1063,7 @@ func TestEventReaderError(t *testing.T) {
err = header.Write(buf) err = header.Write(buf)
assert.NoError(t, err) assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf) r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r) assert.Nil(t, r)
assert.Error(t, err) assert.Error(t, err)
@ -1061,7 +1072,7 @@ func TestEventReaderError(t *testing.T) {
err = header.Write(buf) err = header.Write(buf)
assert.NoError(t, err) assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf) r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r) assert.Nil(t, r)
assert.Error(t, err) assert.Error(t, err)
@ -1078,16 +1089,16 @@ func TestEventReaderError(t *testing.T) {
err = binary.Write(buf, common.Endian, insertData) err = binary.Write(buf, common.Endian, insertData)
assert.NoError(t, err) assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf) r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r) assert.Nil(t, r)
assert.Error(t, err) assert.Error(t, err)
} }
func TestEventClose(t *testing.T) { func TestEventClose(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String) w, err := newInsertEventWriter(schemapb.DataType_String, false)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234") err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err) assert.NoError(t, err)
err = w.Finish() err = w.Finish()
assert.NoError(t, err) assert.NoError(t, err)
@ -1098,7 +1109,7 @@ func TestEventClose(t *testing.T) {
w.Close() w.Close()
wBuf := buf.Bytes() wBuf := buf.Bytes()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err) assert.NoError(t, err)
r.Close() r.Close()

View File

@ -212,16 +212,16 @@ func newDescriptorEvent() *descriptorEvent {
} }
} }
func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventWriter, error) { func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int) (*insertEventWriter, error) {
var payloadWriter PayloadWriterInterface var payloadWriter PayloadWriterInterface
var err error var err error
if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) { if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) {
if len(dim) != 1 { if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers") return nil, fmt.Errorf("incorrect input numbers")
} }
payloadWriter, err = NewPayloadWriter(dataType, dim[0]) payloadWriter, err = NewPayloadWriter(dataType, nullable, dim[0])
} else { } else {
payloadWriter, err = NewPayloadWriter(dataType) payloadWriter, err = NewPayloadWriter(dataType, nullable)
} }
if err != nil { if err != nil {
return nil, err return nil, err
@ -244,7 +244,7 @@ func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventW
} }
func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) { func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -270,7 +270,7 @@ func newCreateCollectionEventWriter(dataType schemapb.DataType) (*createCollecti
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -296,7 +296,7 @@ func newDropCollectionEventWriter(dataType schemapb.DataType) (*dropCollectionEv
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -322,7 +322,7 @@ func newCreatePartitionEventWriter(dataType schemapb.DataType) (*createPartition
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -348,7 +348,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -370,7 +370,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
} }
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) { func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType) payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -59,17 +59,17 @@ func TestSizeofStruct(t *testing.T) {
} }
func TestEventWriter(t *testing.T) { func TestEventWriter(t *testing.T) {
insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32) insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err) assert.NoError(t, err)
insertEvent.Close() insertEvent.Close()
insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32) insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err) assert.NoError(t, err)
defer insertEvent.Close() defer insertEvent.Close()
err = insertEvent.AddInt64ToPayload([]int64{1, 1}) err = insertEvent.AddInt64ToPayload([]int64{1, 1}, nil)
assert.Error(t, err) assert.Error(t, err)
err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3}) err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
nums, err := insertEvent.GetPayloadLengthFromWriter() nums, err := insertEvent.GetPayloadLengthFromWriter()
assert.NoError(t, err) assert.NoError(t, err)
@ -79,7 +79,7 @@ func TestEventWriter(t *testing.T) {
length, err := insertEvent.GetMemoryUsageInBytes() length, err := insertEvent.GetMemoryUsageInBytes()
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, length, insertEvent.EventLength) assert.EqualValues(t, length, insertEvent.EventLength)
err = insertEvent.AddInt32ToPayload([]int32{1}) err = insertEvent.AddInt32ToPayload([]int32{1}, nil)
assert.Error(t, err) assert.Error(t, err)
buffer := new(bytes.Buffer) buffer := new(bytes.Buffer)
insertEvent.SetEventTimestamp(100, 200) insertEvent.SetEventTimestamp(100, 200)

View File

@ -59,7 +59,7 @@ func (codec *IndexFileBinlogCodec) serializeImpl(
} }
defer eventWriter.Close() defer eventWriter.Close()
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value)) err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value), true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -221,7 +221,8 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
switch dataType { switch dataType {
// just for backward compatibility // just for backward compatibility
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
content, err := eventReader.GetByteFromPayload() // todo: smellthemoon, valid_data may need to check when create index
content, _, err := eventReader.GetByteFromPayload()
if err != nil { if err != nil {
log.Warn("failed to get byte from payload", log.Warn("failed to get byte from payload",
zap.Error(err)) zap.Error(err))
@ -239,7 +240,7 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
} }
case schemapb.DataType_String: case schemapb.DataType_String:
content, err := eventReader.GetStringFromPayload() content, _, err := eventReader.GetStringFromPayload()
if err != nil { if err != nil {
log.Warn("failed to get string from payload", zap.Error(err)) log.Warn("failed to get string from payload", zap.Error(err))
eventReader.Close() eventReader.Close()

View File

@ -149,6 +149,7 @@ type FieldData interface {
AppendRow(row interface{}) error AppendRow(row interface{}) error
AppendRows(rows interface{}) error AppendRows(rows interface{}) error
GetDataType() schemapb.DataType GetDataType() schemapb.DataType
GetNullable() bool
} }
func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema, cap int) (FieldData, error) { func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema, cap int) (FieldData, error) {
@ -193,88 +194,142 @@ func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema,
case schemapb.DataType_SparseFloatVector: case schemapb.DataType_SparseFloatVector:
return &SparseFloatVectorFieldData{}, nil return &SparseFloatVectorFieldData{}, nil
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
return &BoolFieldData{ data := &BoolFieldData{
Data: make([]bool, 0, cap), Data: make([]bool, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
return &Int8FieldData{ data := &Int8FieldData{
Data: make([]int8, 0, cap), Data: make([]int8, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
return &Int16FieldData{ data := &Int16FieldData{
Data: make([]int16, 0, cap), Data: make([]int16, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
return &Int32FieldData{ data := &Int32FieldData{
Data: make([]int32, 0, cap), Data: make([]int32, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
return &Int64FieldData{ data := &Int64FieldData{
Data: make([]int64, 0, cap), Data: make([]int64, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Float: case schemapb.DataType_Float:
return &FloatFieldData{ data := &FloatFieldData{
Data: make([]float32, 0, cap), Data: make([]float32, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Double: case schemapb.DataType_Double:
return &DoubleFieldData{ data := &DoubleFieldData{
Data: make([]float64, 0, cap), Data: make([]float64, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
return &JSONFieldData{ data := &JSONFieldData{
Data: make([][]byte, 0, cap), Data: make([][]byte, 0, cap),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Array: case schemapb.DataType_Array:
return &ArrayFieldData{ data := &ArrayFieldData{
Data: make([]*schemapb.ScalarField, 0, cap), Data: make([]*schemapb.ScalarField, 0, cap),
ElementType: fieldSchema.GetElementType(), ElementType: fieldSchema.GetElementType(),
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_String, schemapb.DataType_VarChar: case schemapb.DataType_String, schemapb.DataType_VarChar:
return &StringFieldData{ data := &StringFieldData{
Data: make([]string, 0, cap), Data: make([]string, 0, cap),
DataType: dataType, DataType: dataType,
}, nil }
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
default: default:
return nil, fmt.Errorf("Unexpected schema data type: %d", dataType) return nil, fmt.Errorf("Unexpected schema data type: %d", dataType)
} }
} }
type BoolFieldData struct { type BoolFieldData struct {
Data []bool Data []bool
ValidData []bool
} }
type Int8FieldData struct { type Int8FieldData struct {
Data []int8 Data []int8
ValidData []bool
} }
type Int16FieldData struct { type Int16FieldData struct {
Data []int16 Data []int16
ValidData []bool
} }
type Int32FieldData struct { type Int32FieldData struct {
Data []int32 Data []int32
ValidData []bool
} }
type Int64FieldData struct { type Int64FieldData struct {
Data []int64 Data []int64
ValidData []bool
} }
type FloatFieldData struct { type FloatFieldData struct {
Data []float32 Data []float32
ValidData []bool
} }
type DoubleFieldData struct { type DoubleFieldData struct {
Data []float64 Data []float64
ValidData []bool
} }
type StringFieldData struct { type StringFieldData struct {
Data []string Data []string
DataType schemapb.DataType DataType schemapb.DataType
ValidData []bool
} }
type ArrayFieldData struct { type ArrayFieldData struct {
ElementType schemapb.DataType ElementType schemapb.DataType
Data []*schemapb.ScalarField Data []*schemapb.ScalarField
ValidData []bool
} }
type JSONFieldData struct { type JSONFieldData struct {
Data [][]byte Data [][]byte
ValidData []bool
} }
type BinaryVectorFieldData struct { type BinaryVectorFieldData struct {
Data []byte Data []byte
@ -671,13 +726,33 @@ func (data *SparseFloatVectorFieldData) AppendRows(rows interface{}) error {
} }
// GetMemorySize implements FieldData.GetMemorySize // GetMemorySize implements FieldData.GetMemorySize
func (data *BoolFieldData) GetMemorySize() int { return binary.Size(data.Data) } func (data *BoolFieldData) GetMemorySize() int {
func (data *Int8FieldData) GetMemorySize() int { return binary.Size(data.Data) } return binary.Size(data.Data) + binary.Size(data.ValidData)
func (data *Int16FieldData) GetMemorySize() int { return binary.Size(data.Data) } }
func (data *Int32FieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int64FieldData) GetMemorySize() int { return binary.Size(data.Data) } func (data *Int8FieldData) GetMemorySize() int {
func (data *FloatFieldData) GetMemorySize() int { return binary.Size(data.Data) } return binary.Size(data.Data) + binary.Size(data.ValidData)
func (data *DoubleFieldData) GetMemorySize() int { return binary.Size(data.Data) } }
func (data *Int16FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int32FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int64FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *FloatFieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *DoubleFieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *BinaryVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } func (data *BinaryVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
func (data *FloatVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } func (data *FloatVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
func (data *Float16VectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } func (data *Float16VectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
@ -802,3 +877,63 @@ func (data *ArrayFieldData) GetRowSize(i int) int {
func (data *SparseFloatVectorFieldData) GetRowSize(i int) int { func (data *SparseFloatVectorFieldData) GetRowSize(i int) int {
return len(data.Contents[i]) return len(data.Contents[i])
} }
func (data *BoolFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int8FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int16FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int32FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int64FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *FloatFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *DoubleFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *BFloat16VectorFieldData) GetNullable() bool {
return false
}
func (data *BinaryVectorFieldData) GetNullable() bool {
return false
}
func (data *FloatVectorFieldData) GetNullable() bool {
return false
}
func (data *SparseFloatVectorFieldData) GetNullable() bool {
return false
}
func (data *Float16VectorFieldData) GetNullable() bool {
return false
}
func (data *StringFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *ArrayFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *JSONFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}

View File

@ -41,6 +41,39 @@ func (s *InsertDataSuite) TestInsertData() {
s.Nil(idata) s.Nil(idata)
}) })
s.Run("nullable field schema", func() {
tests := []struct {
description string
dataType schemapb.DataType
}{
{"nullable bool field", schemapb.DataType_Bool},
{"nullable int8 field", schemapb.DataType_Int8},
{"nullable int16 field", schemapb.DataType_Int16},
{"nullable int32 field", schemapb.DataType_Int32},
{"nullable int64 field", schemapb.DataType_Int64},
{"nullable float field", schemapb.DataType_Float},
{"nullable double field", schemapb.DataType_Double},
{"nullable json field", schemapb.DataType_JSON},
{"nullable array field", schemapb.DataType_Array},
{"nullable string/varchar field", schemapb.DataType_String},
}
for _, test := range tests {
s.Run(test.description, func() {
schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
DataType: test.dataType,
Nullable: true,
},
},
}
_, err := NewInsertData(schema)
s.Nil(err)
})
}
})
s.Run("invalid schema", func() { s.Run("invalid schema", func() {
tests := []struct { tests := []struct {
description string description string
@ -183,6 +216,14 @@ func (s *InsertDataSuite) TestGetDataType() {
} }
} }
func (s *InsertDataSuite) TestGetNullable() {
for _, field := range s.schema.GetFields() {
fieldData, ok := s.iDataOneRow.Data[field.GetFieldID()]
s.True(ok)
s.Equal(field.GetNullable(), fieldData.GetNullable())
}
}
func (s *InsertDataSuite) SetupTest() { func (s *InsertDataSuite) SetupTest() {
var err error var err error
s.iDataEmpty, err = NewInsertData(s.schema) s.iDataEmpty, err = NewInsertData(s.schema)

View File

@ -26,18 +26,18 @@ import (
// PayloadWriterInterface abstracts PayloadWriter // PayloadWriterInterface abstracts PayloadWriter
type PayloadWriterInterface interface { type PayloadWriterInterface interface {
AddDataToPayload(msgs any, dim ...int) error AddDataToPayload(msgs any, valids []bool) error
AddBoolToPayload(msgs []bool) error AddBoolToPayload(msgs []bool, valids []bool) error
AddByteToPayload(msgs []byte) error AddByteToPayload(msgs []byte, valids []bool) error
AddInt8ToPayload(msgs []int8) error AddInt8ToPayload(msgs []int8, valids []bool) error
AddInt16ToPayload(msgs []int16) error AddInt16ToPayload(msgs []int16, valids []bool) error
AddInt32ToPayload(msgs []int32) error AddInt32ToPayload(msgs []int32, valids []bool) error
AddInt64ToPayload(msgs []int64) error AddInt64ToPayload(msgs []int64, valids []bool) error
AddFloatToPayload(msgs []float32) error AddFloatToPayload(msgs []float32, valids []bool) error
AddDoubleToPayload(msgs []float64) error AddDoubleToPayload(msgs []float64, valids []bool) error
AddOneStringToPayload(msgs string) error AddOneStringToPayload(msgs string, isValid bool) error
AddOneArrayToPayload(msg *schemapb.ScalarField) error AddOneArrayToPayload(msg *schemapb.ScalarField, isValid bool) error
AddOneJSONToPayload(msg []byte) error AddOneJSONToPayload(msg []byte, isValid bool) error
AddBinaryVectorToPayload(binVec []byte, dim int) error AddBinaryVectorToPayload(binVec []byte, dim int) error
AddFloatVectorToPayload(binVec []float32, dim int) error AddFloatVectorToPayload(binVec []float32, dim int) error
AddFloat16VectorToPayload(binVec []byte, dim int) error AddFloat16VectorToPayload(binVec []byte, dim int) error
@ -53,18 +53,18 @@ type PayloadWriterInterface interface {
// PayloadReaderInterface abstracts PayloadReader // PayloadReaderInterface abstracts PayloadReader
type PayloadReaderInterface interface { type PayloadReaderInterface interface {
GetDataFromPayload() (any, int, error) GetDataFromPayload() (any, []bool, int, error)
GetBoolFromPayload() ([]bool, error) GetBoolFromPayload() ([]bool, []bool, error)
GetByteFromPayload() ([]byte, error) GetByteFromPayload() ([]byte, []bool, error)
GetInt8FromPayload() ([]int8, error) GetInt8FromPayload() ([]int8, []bool, error)
GetInt16FromPayload() ([]int16, error) GetInt16FromPayload() ([]int16, []bool, error)
GetInt32FromPayload() ([]int32, error) GetInt32FromPayload() ([]int32, []bool, error)
GetInt64FromPayload() ([]int64, error) GetInt64FromPayload() ([]int64, []bool, error)
GetFloatFromPayload() ([]float32, error) GetFloatFromPayload() ([]float32, []bool, error)
GetDoubleFromPayload() ([]float64, error) GetDoubleFromPayload() ([]float64, []bool, error)
GetStringFromPayload() ([]string, error) GetStringFromPayload() ([]string, []bool, error)
GetArrayFromPayload() ([]*schemapb.ScalarField, error) GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error)
GetJSONFromPayload() ([][]byte, error) GetJSONFromPayload() ([][]byte, []bool, error)
GetBinaryVectorFromPayload() ([]byte, int, error) GetBinaryVectorFromPayload() ([]byte, int, error)
GetFloat16VectorFromPayload() ([]byte, int, error) GetFloat16VectorFromPayload() ([]byte, int, error)
GetBFloat16VectorFromPayload() ([]byte, int, error) GetBFloat16VectorFromPayload() ([]byte, int, error)

View File

@ -4,29 +4,35 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"time"
"github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory" "github.com/apache/arrow/go/v12/arrow/memory"
"github.com/apache/arrow/go/v12/parquet" "github.com/apache/arrow/go/v12/parquet"
"github.com/apache/arrow/go/v12/parquet/file" "github.com/apache/arrow/go/v12/parquet/file"
"github.com/apache/arrow/go/v12/parquet/pqarrow" "github.com/apache/arrow/go/v12/parquet/pqarrow"
"github.com/cockroachdb/errors" "github.com/cockroachdb/errors"
"github.com/golang/protobuf/proto" "github.com/golang/protobuf/proto"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
) )
// PayloadReader reads data from payload // PayloadReader reads data from payload
type PayloadReader struct { type PayloadReader struct {
reader *file.Reader reader *file.Reader
colType schemapb.DataType colType schemapb.DataType
numRows int64 numRows int64
nullable bool
} }
var _ PayloadReaderInterface = (*PayloadReader)(nil) var _ PayloadReaderInterface = (*PayloadReader)(nil)
func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, error) { func NewPayloadReader(colType schemapb.DataType, buf []byte, nullable bool) (*PayloadReader, error) {
if len(buf) == 0 { if len(buf) == 0 {
return nil, errors.New("create Payload reader failed, buffer is empty") return nil, errors.New("create Payload reader failed, buffer is empty")
} }
@ -34,59 +40,66 @@ func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, er
if err != nil { if err != nil {
return nil, err return nil, err
} }
return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows()}, nil return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows(), nullable: nullable}, nil
} }
// GetDataFromPayload returns data,length from payload, returns err if failed // GetDataFromPayload returns data,length from payload, returns err if failed
// Return: // Return:
// //
// `interface{}`: all types.
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type. // `interface{}`: all types.
// `error`: error. // `[]bool`: validData, only meaningful to ScalarField.
func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) { // `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
// `error`: error.
func (r *PayloadReader) GetDataFromPayload() (interface{}, []bool, int, error) {
switch r.colType { switch r.colType {
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
val, err := r.GetBoolFromPayload() val, validData, err := r.GetBoolFromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
val, err := r.GetInt8FromPayload() val, validData, err := r.GetInt8FromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
val, err := r.GetInt16FromPayload() val, validData, err := r.GetInt16FromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
val, err := r.GetInt32FromPayload() val, validData, err := r.GetInt32FromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
val, err := r.GetInt64FromPayload() val, validData, err := r.GetInt64FromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Float: case schemapb.DataType_Float:
val, err := r.GetFloatFromPayload() val, validData, err := r.GetFloatFromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Double: case schemapb.DataType_Double:
val, err := r.GetDoubleFromPayload() val, validData, err := r.GetDoubleFromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_BinaryVector: case schemapb.DataType_BinaryVector:
return r.GetBinaryVectorFromPayload() val, dim, err := r.GetBinaryVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_FloatVector: case schemapb.DataType_FloatVector:
return r.GetFloatVectorFromPayload() val, dim, err := r.GetFloatVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_Float16Vector: case schemapb.DataType_Float16Vector:
return r.GetFloat16VectorFromPayload() val, dim, err := r.GetFloat16VectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_BFloat16Vector: case schemapb.DataType_BFloat16Vector:
return r.GetBFloat16VectorFromPayload() val, dim, err := r.GetBFloat16VectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_SparseFloatVector: case schemapb.DataType_SparseFloatVector:
return r.GetSparseFloatVectorFromPayload() val, dim, err := r.GetSparseFloatVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_String, schemapb.DataType_VarChar: case schemapb.DataType_String, schemapb.DataType_VarChar:
val, err := r.GetStringFromPayload() val, validData, err := r.GetStringFromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_Array: case schemapb.DataType_Array:
val, err := r.GetArrayFromPayload() val, validData, err := r.GetArrayFromPayload()
return val, 0, err return val, validData, 0, err
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
val, err := r.GetJSONFromPayload() val, validData, err := r.GetJSONFromPayload()
return val, 0, err return val, validData, 0, err
default: default:
return nil, 0, errors.New("unknown type") return nil, nil, 0, merr.WrapErrParameterInvalidMsg("unknown type")
} }
} }
@ -96,169 +109,327 @@ func (r *PayloadReader) ReleasePayloadReader() error {
} }
// GetBoolFromPayload returns bool slice from payload. // GetBoolFromPayload returns bool slice from payload.
func (r *PayloadReader) GetBoolFromPayload() ([]bool, error) { func (r *PayloadReader) GetBoolFromPayload() ([]bool, []bool, error) {
if r.colType != schemapb.DataType_Bool { if r.colType != schemapb.DataType_Bool {
return nil, fmt.Errorf("failed to get bool from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get bool from datatype %v", r.colType.String()))
} }
values := make([]bool, r.numRows) values := make([]bool, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[bool, *array.Boolean](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[bool, *file.BooleanColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[bool, *file.BooleanColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
return values, nil return values, nil, nil
} }
// GetByteFromPayload returns byte slice from payload // GetByteFromPayload returns byte slice from payload
func (r *PayloadReader) GetByteFromPayload() ([]byte, error) { func (r *PayloadReader) GetByteFromPayload() ([]byte, []bool, error) {
if r.colType != schemapb.DataType_Int8 { if r.colType != schemapb.DataType_Int8 {
return nil, fmt.Errorf("failed to get byte from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get byte from datatype %v", r.colType.String()))
} }
if r.nullable {
values := make([]int32, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]byte, r.numRows)
for i := int64(0); i < r.numRows; i++ {
ret[i] = byte(values[i])
}
return ret, validData, nil
}
values := make([]int32, r.numRows) values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
ret := make([]byte, r.numRows) ret := make([]byte, r.numRows)
for i := int64(0); i < r.numRows; i++ { for i := int64(0); i < r.numRows; i++ {
ret[i] = byte(values[i]) ret[i] = byte(values[i])
} }
return ret, nil return ret, nil, nil
} }
// GetInt8FromPayload returns int8 slice from payload func (r *PayloadReader) GetInt8FromPayload() ([]int8, []bool, error) {
func (r *PayloadReader) GetInt8FromPayload() ([]int8, error) {
if r.colType != schemapb.DataType_Int8 { if r.colType != schemapb.DataType_Int8 {
return nil, fmt.Errorf("failed to get int8 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int8 from datatype %v", r.colType.String()))
} }
if r.nullable {
values := make([]int8, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int8, *array.Int8](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
values := make([]int32, r.numRows) values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
ret := make([]int8, r.numRows) ret := make([]int8, r.numRows)
for i := int64(0); i < r.numRows; i++ { for i := int64(0); i < r.numRows; i++ {
ret[i] = int8(values[i]) ret[i] = int8(values[i])
} }
return ret, nil return ret, nil, nil
} }
func (r *PayloadReader) GetInt16FromPayload() ([]int16, error) { func (r *PayloadReader) GetInt16FromPayload() ([]int16, []bool, error) {
if r.colType != schemapb.DataType_Int16 { if r.colType != schemapb.DataType_Int16 {
return nil, fmt.Errorf("failed to get int16 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int16 from datatype %v", r.colType.String()))
} }
if r.nullable {
values := make([]int16, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int16, *array.Int16](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
values := make([]int32, r.numRows) values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
ret := make([]int16, r.numRows) ret := make([]int16, r.numRows)
for i := int64(0); i < r.numRows; i++ { for i := int64(0); i < r.numRows; i++ {
ret[i] = int16(values[i]) ret[i] = int16(values[i])
} }
return ret, nil return ret, nil, nil
} }
func (r *PayloadReader) GetInt32FromPayload() ([]int32, error) { func (r *PayloadReader) GetInt32FromPayload() ([]int32, []bool, error) {
if r.colType != schemapb.DataType_Int32 { if r.colType != schemapb.DataType_Int32 {
return nil, fmt.Errorf("failed to get int32 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int32 from datatype %v", r.colType.String()))
} }
values := make([]int32, r.numRows) values := make([]int32, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
return values, nil return values, nil, nil
} }
func (r *PayloadReader) GetInt64FromPayload() ([]int64, error) { func (r *PayloadReader) GetInt64FromPayload() ([]int64, []bool, error) {
if r.colType != schemapb.DataType_Int64 { if r.colType != schemapb.DataType_Int64 {
return nil, fmt.Errorf("failed to get int64 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int64 from datatype %v", r.colType.String()))
} }
values := make([]int64, r.numRows) values := make([]int64, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int64, *array.Int64](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[int64, *file.Int64ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int64, *file.Int64ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
return values, nil return values, nil, nil
} }
func (r *PayloadReader) GetFloatFromPayload() ([]float32, error) { func (r *PayloadReader) GetFloatFromPayload() ([]float32, []bool, error) {
if r.colType != schemapb.DataType_Float { if r.colType != schemapb.DataType_Float {
return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get float32 from datatype %v", r.colType.String()))
} }
values := make([]float32, r.numRows) values := make([]float32, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[float32, *array.Float32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[float32, *file.Float32ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[float32, *file.Float32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
return values, nil, nil
return values, nil
} }
func (r *PayloadReader) GetDoubleFromPayload() ([]float64, error) { func (r *PayloadReader) GetDoubleFromPayload() ([]float64, []bool, error) {
if r.colType != schemapb.DataType_Double { if r.colType != schemapb.DataType_Double {
return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get double from datatype %v", r.colType.String()))
} }
values := make([]float64, r.numRows) values := make([]float64, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[float64, *array.Float64](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[float64, *file.Float64ColumnChunkReader](r.reader, values, 0, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[float64, *file.Float64ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if valuesRead != r.numRows { if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
} }
return values, nil return values, nil, nil
} }
func (r *PayloadReader) GetStringFromPayload() ([]string, error) { func (r *PayloadReader) GetStringFromPayload() ([]string, []bool, error) {
if r.colType != schemapb.DataType_String && r.colType != schemapb.DataType_VarChar { if r.colType != schemapb.DataType_String && r.colType != schemapb.DataType_VarChar {
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get string from datatype %v", r.colType.String()))
} }
return readByteAndConvert(r, func(bytes parquet.ByteArray) string { if r.nullable {
values := make([]string, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[string, *array.String](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) string {
return bytes.String() return bytes.String()
}) })
if err != nil {
return nil, nil, err
}
return value, nil, nil
}
func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error) {
if r.colType != schemapb.DataType_Array {
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get array from datatype %v", r.colType.String()))
}
if r.nullable {
return readNullableByteAndConvert(r, func(bytes []byte) *schemapb.ScalarField {
v := &schemapb.ScalarField{}
proto.Unmarshal(bytes, v)
return v
})
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField {
v := &schemapb.ScalarField{}
proto.Unmarshal(bytes, v)
return v
})
if err != nil {
return nil, nil, err
}
return value, nil, nil
}
func (r *PayloadReader) GetJSONFromPayload() ([][]byte, []bool, error) {
if r.colType != schemapb.DataType_JSON {
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get json from datatype %v", r.colType.String()))
}
if r.nullable {
return readNullableByteAndConvert(r, func(bytes []byte) []byte {
return bytes
})
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) []byte {
return bytes
})
if err != nil {
return nil, nil, err
}
return value, nil, nil
} }
func (r *PayloadReader) GetByteArrayDataSet() (*DataSet[parquet.ByteArray, *file.ByteArrayColumnChunkReader], error) { func (r *PayloadReader) GetByteArrayDataSet() (*DataSet[parquet.ByteArray, *file.ByteArrayColumnChunkReader], error) {
@ -282,25 +453,23 @@ func (r *PayloadReader) GetArrowRecordReader() (pqarrow.RecordReader, error) {
return rr, nil return rr, nil
} }
func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, error) { func readNullableByteAndConvert[T any](r *PayloadReader, convert func([]byte) T) ([]T, []bool, error) {
if r.colType != schemapb.DataType_Array { values := make([][]byte, r.numRows)
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) validData := make([]bool, r.numRows)
} valuesRead, err := ReadData[[]byte, *array.Binary](r.reader, values, validData, r.numRows)
return readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField { if err != nil {
v := &schemapb.ScalarField{} return nil, nil, err
proto.Unmarshal(bytes, v)
return v
})
}
func (r *PayloadReader) GetJSONFromPayload() ([][]byte, error) {
if r.colType != schemapb.DataType_JSON {
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String())
} }
return readByteAndConvert(r, func(bytes parquet.ByteArray) []byte { if valuesRead != r.numRows {
return bytes return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}) }
ret := make([]T, r.numRows)
for i := 0; i < int(r.numRows); i++ {
ret[i] = convert(values[i])
}
return ret, validData, nil
} }
func readByteAndConvert[T any](r *PayloadReader, convert func(parquet.ByteArray) T) ([]T, error) { func readByteAndConvert[T any](r *PayloadReader, convert func(parquet.ByteArray) T) ([]T, error) {
@ -568,3 +737,67 @@ func (s *DataSet[T, E]) NextBatch(batch int64) ([]T, error) {
s.cnt += batch s.cnt += batch
return result, nil return result, nil
} }
func ReadData[T any, E interface {
Value(int) T
NullBitmapBytes() []byte
}](reader *file.Reader, value []T, validData []bool, numRows int64) (int64, error) {
var offset int
fileReader, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
// defer fileReader.ParquetReader().Close()
if err != nil {
log.Warn("create arrow parquet file reader failed", zap.Error(err))
return -1, err
}
schema, err := fileReader.Schema()
if err != nil {
log.Warn("can't schema from file", zap.Error(err))
return -1, err
}
for i, field := range schema.Fields() {
// Spawn a new context to ignore cancellation from parental context.
newCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
columnReader, err := fileReader.GetColumn(newCtx, i)
if err != nil {
log.Warn("get column reader failed", zap.String("fieldName", field.Name), zap.Error(err))
return -1, err
}
chunked, err := columnReader.NextBatch(numRows)
if err != nil {
return -1, err
}
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
reader, ok := chunk.(E)
if !ok {
log.Warn("the column data in parquet is not equal to field", zap.String("fieldName", field.Name), zap.String("actual type", chunk.DataType().Name()))
return -1, merr.WrapErrImportFailed(fmt.Sprintf("the column data in parquet is not equal to field: %s, but: %s", field.Name, chunk.DataType().Name()))
}
nullBitset := bytesToBoolArray(dataNums, reader.NullBitmapBytes())
for i := 0; i < dataNums; i++ {
value[offset] = reader.Value(i)
validData[offset] = nullBitset[i]
offset++
}
}
}
return int64(offset), nil
}
// todo(smellthemoon): use byte to store valid_data
func bytesToBoolArray(length int, bytes []byte) []bool {
bools := make([]bool, 0, length)
for i := 0; i < length; i++ {
bit := (bytes[uint(i)/8] & BitMask[byte(i)%8]) != 0
bools = append(bools, bit)
}
return bools
}
var (
BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128}
FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127}
)

View File

@ -31,7 +31,7 @@ func (s *ReadDataFromAllRowGroupsSuite) SetupSuite() {
s.size = 1 << 10 s.size = 1 << 10
data := make([]int8, s.size) data := make([]int8, s.size)
err = ew.AddInt8ToPayload(data) err = ew.AddInt8ToPayload(data, nil)
s.Require().NoError(err) s.Require().NoError(err)
ew.SetEventTimestamp(1, 1) ew.SetEventTimestamp(1, 1)

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
) )
@ -46,18 +47,29 @@ type NativePayloadWriter struct {
flushedRows int flushedRows int
output *bytes.Buffer output *bytes.Buffer
releaseOnce sync.Once releaseOnce sync.Once
dim int
nullable bool
} }
func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInterface, error) { func NewPayloadWriter(colType schemapb.DataType, nullable bool, dim ...int) (PayloadWriterInterface, error) {
var arrowType arrow.DataType var arrowType arrow.DataType
var dimension int
// writer for sparse float vector doesn't require dim // writer for sparse float vector doesn't require dim
if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) { if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) {
if len(dim) != 1 { if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers") return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
if nullable {
return nil, merr.WrapErrParameterInvalidMsg("vector type not supprot nullable")
} }
arrowType = milvusDataTypeToArrowType(colType, dim[0]) arrowType = milvusDataTypeToArrowType(colType, dim[0])
dimension = dim[0]
} else { } else {
if len(dim) != 0 {
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
arrowType = milvusDataTypeToArrowType(colType, 1) arrowType = milvusDataTypeToArrowType(colType, 1)
dimension = 1
} }
builder := array.NewBuilder(memory.DefaultAllocator, arrowType) builder := array.NewBuilder(memory.DefaultAllocator, arrowType)
@ -69,117 +81,148 @@ func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInter
finished: false, finished: false,
flushedRows: 0, flushedRows: 0,
output: new(bytes.Buffer), output: new(bytes.Buffer),
dim: dimension,
nullable: nullable,
}, nil }, nil
} }
func (w *NativePayloadWriter) AddDataToPayload(data interface{}, dim ...int) error { func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []bool) error {
switch len(dim) { switch w.dataType {
case 0: case schemapb.DataType_Bool:
switch w.dataType { val, ok := data.([]bool)
case schemapb.DataType_Bool: if !ok {
val, ok := data.([]bool) return merr.WrapErrParameterInvalidMsg("incorrect data type")
if !ok {
return errors.New("incorrect data type")
}
return w.AddBoolToPayload(val)
case schemapb.DataType_Int8:
val, ok := data.([]int8)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt8ToPayload(val)
case schemapb.DataType_Int16:
val, ok := data.([]int16)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt16ToPayload(val)
case schemapb.DataType_Int32:
val, ok := data.([]int32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt32ToPayload(val)
case schemapb.DataType_Int64:
val, ok := data.([]int64)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt64ToPayload(val)
case schemapb.DataType_Float:
val, ok := data.([]float32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloatToPayload(val)
case schemapb.DataType_Double:
val, ok := data.([]float64)
if !ok {
return errors.New("incorrect data type")
}
return w.AddDoubleToPayload(val)
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, ok := data.(string)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneStringToPayload(val)
case schemapb.DataType_Array:
val, ok := data.(*schemapb.ScalarField)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneArrayToPayload(val)
case schemapb.DataType_JSON:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneJSONToPayload(val)
default:
return errors.New("incorrect datatype")
} }
case 1: return w.AddBoolToPayload(val, validData)
switch w.dataType { case schemapb.DataType_Int8:
case schemapb.DataType_BinaryVector: val, ok := data.([]int8)
val, ok := data.([]byte) if !ok {
if !ok { return merr.WrapErrParameterInvalidMsg("incorrect data type")
return errors.New("incorrect data type")
}
return w.AddBinaryVectorToPayload(val, dim[0])
case schemapb.DataType_FloatVector:
val, ok := data.([]float32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloatVectorToPayload(val, dim[0])
case schemapb.DataType_Float16Vector:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloat16VectorToPayload(val, dim[0])
case schemapb.DataType_BFloat16Vector:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddBFloat16VectorToPayload(val, dim[0])
case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData)
if !ok {
return errors.New("incorrect data type")
}
return w.AddSparseFloatVectorToPayload(val)
default:
return errors.New("incorrect datatype")
} }
return w.AddInt8ToPayload(val, validData)
case schemapb.DataType_Int16:
val, ok := data.([]int16)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt16ToPayload(val, validData)
case schemapb.DataType_Int32:
val, ok := data.([]int32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt32ToPayload(val, validData)
case schemapb.DataType_Int64:
val, ok := data.([]int64)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt64ToPayload(val, validData)
case schemapb.DataType_Float:
val, ok := data.([]float32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloatToPayload(val, validData)
case schemapb.DataType_Double:
val, ok := data.([]float64)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddDoubleToPayload(val, validData)
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, ok := data.(string)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneStringToPayload(val, isValid)
case schemapb.DataType_Array:
val, ok := data.(*schemapb.ScalarField)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneArrayToPayload(val, isValid)
case schemapb.DataType_JSON:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneJSONToPayload(val, isValid)
case schemapb.DataType_BinaryVector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddBinaryVectorToPayload(val, w.dim)
case schemapb.DataType_FloatVector:
val, ok := data.([]float32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloatVectorToPayload(val, w.dim)
case schemapb.DataType_Float16Vector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloat16VectorToPayload(val, w.dim)
case schemapb.DataType_BFloat16Vector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddBFloat16VectorToPayload(val, w.dim)
case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddSparseFloatVectorToPayload(val)
default: default:
return errors.New("incorrect input numbers") return errors.New("unsupported datatype")
} }
} }
func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error { func (w *NativePayloadWriter) AddBoolToPayload(data []bool, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished bool payload") return errors.New("can't append data to finished bool payload")
} }
@ -188,16 +231,26 @@ func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error {
return errors.New("can't add empty msgs into bool payload") return errors.New("can't add empty msgs into bool payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.BooleanBuilder) builder, ok := w.builder.(*array.BooleanBuilder)
if !ok { if !ok {
return errors.New("failed to cast ArrayBuilder") return errors.New("failed to cast ArrayBuilder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddByteToPayload(data []byte) error { func (w *NativePayloadWriter) AddByteToPayload(data []byte, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished byte payload") return errors.New("can't append data to finished byte payload")
} }
@ -206,6 +259,16 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error {
return errors.New("can't add empty msgs into byte payload") return errors.New("can't add empty msgs into byte payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int8Builder) builder, ok := w.builder.(*array.Int8Builder)
if !ok { if !ok {
return errors.New("failed to cast ByteBuilder") return errors.New("failed to cast ByteBuilder")
@ -214,12 +277,15 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error {
builder.Reserve(len(data)) builder.Reserve(len(data))
for i := range data { for i := range data {
builder.Append(int8(data[i])) builder.Append(int8(data[i]))
if w.nullable && !validData[i] {
builder.AppendNull()
}
} }
return nil return nil
} }
func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error { func (w *NativePayloadWriter) AddInt8ToPayload(data []int8, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished int8 payload") return errors.New("can't append data to finished int8 payload")
} }
@ -228,16 +294,26 @@ func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error {
return errors.New("can't add empty msgs into int8 payload") return errors.New("can't add empty msgs into int8 payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int8Builder) builder, ok := w.builder.(*array.Int8Builder)
if !ok { if !ok {
return errors.New("failed to cast Int8Builder") return errors.New("failed to cast Int8Builder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error { func (w *NativePayloadWriter) AddInt16ToPayload(data []int16, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished int16 payload") return errors.New("can't append data to finished int16 payload")
} }
@ -246,16 +322,26 @@ func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error {
return errors.New("can't add empty msgs into int16 payload") return errors.New("can't add empty msgs into int16 payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int16Builder) builder, ok := w.builder.(*array.Int16Builder)
if !ok { if !ok {
return errors.New("failed to cast Int16Builder") return errors.New("failed to cast Int16Builder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error { func (w *NativePayloadWriter) AddInt32ToPayload(data []int32, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished int32 payload") return errors.New("can't append data to finished int32 payload")
} }
@ -264,16 +350,26 @@ func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error {
return errors.New("can't add empty msgs into int32 payload") return errors.New("can't add empty msgs into int32 payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int32Builder) builder, ok := w.builder.(*array.Int32Builder)
if !ok { if !ok {
return errors.New("failed to cast Int32Builder") return errors.New("failed to cast Int32Builder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error { func (w *NativePayloadWriter) AddInt64ToPayload(data []int64, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished int64 payload") return errors.New("can't append data to finished int64 payload")
} }
@ -282,16 +378,26 @@ func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error {
return errors.New("can't add empty msgs into int64 payload") return errors.New("can't add empty msgs into int64 payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int64Builder) builder, ok := w.builder.(*array.Int64Builder)
if !ok { if !ok {
return errors.New("failed to cast Int64Builder") return errors.New("failed to cast Int64Builder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error { func (w *NativePayloadWriter) AddFloatToPayload(data []float32, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished float payload") return errors.New("can't append data to finished float payload")
} }
@ -300,16 +406,26 @@ func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error {
return errors.New("can't add empty msgs into float payload") return errors.New("can't add empty msgs into float payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Float32Builder) builder, ok := w.builder.(*array.Float32Builder)
if !ok { if !ok {
return errors.New("failed to cast FloatBuilder") return errors.New("failed to cast FloatBuilder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error { func (w *NativePayloadWriter) AddDoubleToPayload(data []float64, validData []bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished double payload") return errors.New("can't append data to finished double payload")
} }
@ -318,35 +434,57 @@ func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error {
return errors.New("can't add empty msgs into double payload") return errors.New("can't add empty msgs into double payload")
} }
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Float64Builder) builder, ok := w.builder.(*array.Float64Builder)
if !ok { if !ok {
return errors.New("failed to cast DoubleBuilder") return errors.New("failed to cast DoubleBuilder")
} }
builder.AppendValues(data, nil) builder.AppendValues(data, validData)
return nil return nil
} }
func (w *NativePayloadWriter) AddOneStringToPayload(data string) error { func (w *NativePayloadWriter) AddOneStringToPayload(data string, isValid bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished string payload") return errors.New("can't append data to finished string payload")
} }
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
builder, ok := w.builder.(*array.StringBuilder) builder, ok := w.builder.(*array.StringBuilder)
if !ok { if !ok {
return errors.New("failed to cast StringBuilder") return errors.New("failed to cast StringBuilder")
} }
builder.Append(data) if !isValid {
builder.AppendNull()
} else {
builder.Append(data)
}
return nil return nil
} }
func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) error { func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField, isValid bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished array payload") return errors.New("can't append data to finished array payload")
} }
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
bytes, err := proto.Marshal(data) bytes, err := proto.Marshal(data)
if err != nil { if err != nil {
return errors.New("Marshal ListValue failed") return errors.New("Marshal ListValue failed")
@ -357,22 +495,34 @@ func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) e
return errors.New("failed to cast BinaryBuilder") return errors.New("failed to cast BinaryBuilder")
} }
builder.Append(bytes) if !isValid {
builder.AppendNull()
} else {
builder.Append(bytes)
}
return nil return nil
} }
func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte) error { func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte, isValid bool) error {
if w.finished { if w.finished {
return errors.New("can't append data to finished json payload") return errors.New("can't append data to finished json payload")
} }
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
builder, ok := w.builder.(*array.BinaryBuilder) builder, ok := w.builder.(*array.BinaryBuilder)
if !ok { if !ok {
return errors.New("failed to cast JsonBuilder") return errors.New("failed to cast JsonBuilder")
} }
builder.Append(data) if !isValid {
builder.AppendNull()
} else {
builder.Append(data)
}
return nil return nil
} }
@ -507,8 +657,9 @@ func (w *NativePayloadWriter) FinishPayloadWriter() error {
w.finished = true w.finished = true
field := arrow.Field{ field := arrow.Field{
Name: "val", Name: "val",
Type: w.arrowType, Type: w.arrowType,
Nullable: w.nullable,
} }
schema := arrow.NewSchema([]arrow.Field{ schema := arrow.NewSchema([]arrow.Field{
field, field,

View File

@ -9,241 +9,248 @@ import (
) )
func TestPayloadWriter_Failed(t *testing.T) { func TestPayloadWriter_Failed(t *testing.T) {
t.Run("wrong input", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_FloatVector, false)
require.Error(t, err)
_, err = NewPayloadWriter(schemapb.DataType_Bool, false, 1)
require.Error(t, err)
})
t.Run("Test Bool", func(t *testing.T) { t.Run("Test Bool", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool) w, err := NewPayloadWriter(schemapb.DataType_Bool, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddBoolToPayload([]bool{}) err = w.AddBoolToPayload([]bool{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddBoolToPayload([]bool{false}) err = w.AddBoolToPayload([]bool{false}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddBoolToPayload([]bool{false}) err = w.AddBoolToPayload([]bool{false}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Byte", func(t *testing.T) { t.Run("Test Byte", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8) w, err := NewPayloadWriter(schemapb.DataType_Int8, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddByteToPayload([]byte{}) err = w.AddByteToPayload([]byte{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddByteToPayload([]byte{0}) err = w.AddByteToPayload([]byte{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddByteToPayload([]byte{0}) err = w.AddByteToPayload([]byte{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Int8", func(t *testing.T) { t.Run("Test Int8", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8) w, err := NewPayloadWriter(schemapb.DataType_Int8, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt8ToPayload([]int8{}) err = w.AddInt8ToPayload([]int8{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddInt8ToPayload([]int8{0}) err = w.AddInt8ToPayload([]int8{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt8ToPayload([]int8{0}) err = w.AddInt8ToPayload([]int8{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Int16", func(t *testing.T) { t.Run("Test Int16", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16) w, err := NewPayloadWriter(schemapb.DataType_Int16, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt16ToPayload([]int16{}) err = w.AddInt16ToPayload([]int16{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddInt16ToPayload([]int16{0}) err = w.AddInt16ToPayload([]int16{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt16ToPayload([]int16{0}) err = w.AddInt16ToPayload([]int16{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Int32", func(t *testing.T) { t.Run("Test Int32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32) w, err := NewPayloadWriter(schemapb.DataType_Int32, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt32ToPayload([]int32{}) err = w.AddInt32ToPayload([]int32{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddInt32ToPayload([]int32{0}) err = w.AddInt32ToPayload([]int32{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt32ToPayload([]int32{0}) err = w.AddInt32ToPayload([]int32{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Int64", func(t *testing.T) { t.Run("Test Int64", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64) w, err := NewPayloadWriter(schemapb.DataType_Int64, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt64ToPayload([]int64{}) err = w.AddInt64ToPayload([]int64{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddInt64ToPayload([]int64{0}) err = w.AddInt64ToPayload([]int64{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float) w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddInt64ToPayload([]int64{0}) err = w.AddInt64ToPayload([]int64{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Float", func(t *testing.T) { t.Run("Test Float", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float) w, err := NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddFloatToPayload([]float32{}) err = w.AddFloatToPayload([]float32{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddFloatToPayload([]float32{0}) err = w.AddFloatToPayload([]float32{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddFloatToPayload([]float32{0}) err = w.AddFloatToPayload([]float32{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Double", func(t *testing.T) { t.Run("Test Double", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double) w, err := NewPayloadWriter(schemapb.DataType_Double, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDoubleToPayload([]float64{}) err = w.AddDoubleToPayload([]float64{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddDoubleToPayload([]float64{0}) err = w.AddDoubleToPayload([]float64{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDoubleToPayload([]float64{0}) err = w.AddDoubleToPayload([]float64{0}, nil)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test String", func(t *testing.T) { t.Run("Test String", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String) w, err := NewPayloadWriter(schemapb.DataType_String, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddOneStringToPayload("test") err = w.AddOneStringToPayload("test", false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddOneStringToPayload("test") err = w.AddOneStringToPayload("test", false)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Array", func(t *testing.T) { t.Run("Test Array", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array) w, err := NewPayloadWriter(schemapb.DataType_Array, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddOneArrayToPayload(&schemapb.ScalarField{}) err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddOneArrayToPayload(&schemapb.ScalarField{}) err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Json", func(t *testing.T) { t.Run("Test Json", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON) w, err := NewPayloadWriter(schemapb.DataType_JSON, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddOneJSONToPayload([]byte{0, 1}) err = w.AddOneJSONToPayload([]byte{0, 1}, false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddOneJSONToPayload([]byte{0, 1}) err = w.AddOneJSONToPayload([]byte{0, 1}, false)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test BinaryVector", func(t *testing.T) { t.Run("Test BinaryVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -258,7 +265,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddBinaryVectorToPayload(data, 8) err = w.AddBinaryVectorToPayload(data, 8)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -267,7 +274,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test FloatVector", func(t *testing.T) { t.Run("Test FloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8) w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -276,20 +283,20 @@ func TestPayloadWriter_Failed(t *testing.T) {
data[i] = 1 data[i] = 1
} }
err = w.AddFloatToPayload([]float32{}) err = w.AddFloatToPayload([]float32{}, nil)
require.Error(t, err) require.Error(t, err)
err = w.FinishPayloadWriter() err = w.FinishPayloadWriter()
require.NoError(t, err) require.NoError(t, err)
err = w.AddFloatToPayload(data) err = w.AddFloatToPayload(data, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64) w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddFloatToPayload(data) err = w.AddFloatToPayload(data, nil)
require.Error(t, err) require.Error(t, err)
}) })
} }

View File

@ -224,7 +224,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Println("\tpayload values:") fmt.Println("\tpayload values:")
switch colType { switch colType {
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
val, err := reader.GetBoolFromPayload() val, _, err := reader.GetBoolFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -232,7 +232,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %v\n", i, v) fmt.Printf("\t\t%d : %v\n", i, v)
} }
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
val, err := reader.GetInt8FromPayload() val, _, err := reader.GetInt8FromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -240,7 +240,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v) fmt.Printf("\t\t%d : %d\n", i, v)
} }
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
val, err := reader.GetInt16FromPayload() val, _, err := reader.GetInt16FromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -248,7 +248,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v) fmt.Printf("\t\t%d : %d\n", i, v)
} }
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
val, err := reader.GetInt32FromPayload() val, _, err := reader.GetInt32FromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -256,7 +256,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v) fmt.Printf("\t\t%d : %d\n", i, v)
} }
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
val, err := reader.GetInt64FromPayload() val, _, err := reader.GetInt64FromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -264,7 +264,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v) fmt.Printf("\t\t%d : %d\n", i, v)
} }
case schemapb.DataType_Float: case schemapb.DataType_Float:
val, err := reader.GetFloatFromPayload() val, _, err := reader.GetFloatFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -272,7 +272,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %f\n", i, v) fmt.Printf("\t\t%d : %f\n", i, v)
} }
case schemapb.DataType_Double: case schemapb.DataType_Double:
val, err := reader.GetDoubleFromPayload() val, _, err := reader.GetDoubleFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -285,7 +285,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
return err return err
} }
val, err := reader.GetStringFromPayload() val, _, err := reader.GetStringFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -358,13 +358,16 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
if err != nil { if err != nil {
return err return err
} }
val, err := reader.GetJSONFromPayload() val, valids, err := reader.GetJSONFromPayload()
if err != nil { if err != nil {
return err return err
} }
for i := 0; i < rows; i++ { for i := 0; i < rows; i++ {
fmt.Printf("\t\t%d : %s\n", i, val[i]) fmt.Printf("\t\t%d : %s\n", i, val[i])
} }
for i, v := range valids {
fmt.Printf("\t\t%d : %v\n", i, v)
}
case schemapb.DataType_SparseFloatVector: case schemapb.DataType_SparseFloatVector:
sparseData, _, err := reader.GetSparseFloatVectorFromPayload() sparseData, _, err := reader.GetSparseFloatVectorFromPayload()
if err != nil { if err != nil {
@ -388,7 +391,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
fmt.Println("\tpayload values:") fmt.Println("\tpayload values:")
switch colType { switch colType {
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
val, err := reader.GetInt64FromPayload() val, _, err := reader.GetInt64FromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -402,7 +405,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
return err return err
} }
val, err := reader.GetStringFromPayload() val, _, err := reader.GetStringFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -448,7 +451,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error { func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error {
if dataType == schemapb.DataType_Int8 { if dataType == schemapb.DataType_Int8 {
if key == IndexParamsKey { if key == IndexParamsKey {
content, err := reader.GetByteFromPayload() content, _, err := reader.GetByteFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -459,7 +462,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
} }
if key == "SLICE_META" { if key == "SLICE_META" {
content, err := reader.GetByteFromPayload() content, _, err := reader.GetByteFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -473,7 +476,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
} }
} else { } else {
if key == IndexParamsKey { if key == IndexParamsKey {
content, err := reader.GetStringFromPayload() content, _, err := reader.GetStringFromPayload()
if err != nil { if err != nil {
return err return err
} }
@ -484,7 +487,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
} }
if key == "SLICE_META" { if key == "SLICE_META" {
content, err := reader.GetStringFromPayload() content, _, err := reader.GetStringFromPayload()
if err != nil { if err != nil {
return err return err
} }

View File

@ -36,27 +36,27 @@ import (
) )
func TestPrintBinlogFilesInt64(t *testing.T) { func TestPrintBinlogFilesInt64(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
curTS := time.Now().UnixNano() / int64(time.Millisecond) curTS := time.Now().UnixNano() / int64(time.Millisecond)
e1, err := w.NextInsertEventWriter() e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6}) err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6}) err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0)) e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0))
e2, err := w.NextInsertEventWriter() e2, err := w.NextInsertEventWriter(false)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true}) err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err) assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12}) err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err) assert.NoError(t, err)
e2.SetEventTimestamp(tsoutil.ComposeTS(curTS+30*60*1000, 0), tsoutil.ComposeTS(curTS+40*60*1000, 0)) e2.SetEventTimestamp(tsoutil.ComposeTS(curTS+30*60*1000, 0), tsoutil.ComposeTS(curTS+40*60*1000, 0))

View File

@ -59,3 +59,9 @@ func UnsafeReadFloat64(buf []byte, idx int) float64 {
ptr := unsafe.Pointer(&(buf[idx])) ptr := unsafe.Pointer(&(buf[idx]))
return *((*float64)(ptr)) return *((*float64)(ptr))
} }
/* #nosec G103 */
func UnsafeReadBool(buf []byte, idx int) bool {
ptr := unsafe.Pointer(&(buf[idx]))
return *((*bool)(ptr))
}

View File

@ -567,30 +567,38 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
srcData := srcField.GetScalars().GetBoolData().GetData() srcData := srcField.GetScalars().GetBoolData().GetData()
validData := srcField.GetValidData()
fieldData = &BoolFieldData{ fieldData = &BoolFieldData{
Data: lo.Map(srcData, func(v bool, _ int) bool { return v }), Data: lo.Map(srcData, func(v bool, _ int) bool { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
srcData := srcField.GetScalars().GetIntData().GetData() srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int8FieldData{ fieldData = &Int8FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }), Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
srcData := srcField.GetScalars().GetIntData().GetData() srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int16FieldData{ fieldData = &Int16FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }), Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
srcData := srcField.GetScalars().GetIntData().GetData() srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int32FieldData{ fieldData = &Int32FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }), Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
@ -605,45 +613,57 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche
} }
default: default:
srcData := srcField.GetScalars().GetLongData().GetData() srcData := srcField.GetScalars().GetLongData().GetData()
validData := srcField.GetValidData()
fieldData = &Int64FieldData{ fieldData = &Int64FieldData{
Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }), Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
} }
case schemapb.DataType_Float: case schemapb.DataType_Float:
srcData := srcField.GetScalars().GetFloatData().GetData() srcData := srcField.GetScalars().GetFloatData().GetData()
validData := srcField.GetValidData()
fieldData = &FloatFieldData{ fieldData = &FloatFieldData{
Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }), Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Double: case schemapb.DataType_Double:
srcData := srcField.GetScalars().GetDoubleData().GetData() srcData := srcField.GetScalars().GetDoubleData().GetData()
validData := srcField.GetValidData()
fieldData = &DoubleFieldData{ fieldData = &DoubleFieldData{
Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }), Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_String, schemapb.DataType_VarChar: case schemapb.DataType_String, schemapb.DataType_VarChar:
srcData := srcField.GetScalars().GetStringData().GetData() srcData := srcField.GetScalars().GetStringData().GetData()
validData := srcField.GetValidData()
fieldData = &StringFieldData{ fieldData = &StringFieldData{
Data: lo.Map(srcData, func(v string, _ int) string { return v }), Data: lo.Map(srcData, func(v string, _ int) string { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_Array: case schemapb.DataType_Array:
srcData := srcField.GetScalars().GetArrayData().GetData() srcData := srcField.GetScalars().GetArrayData().GetData()
validData := srcField.GetValidData()
fieldData = &ArrayFieldData{ fieldData = &ArrayFieldData{
ElementType: field.GetElementType(), ElementType: field.GetElementType(),
Data: lo.Map(srcData, func(v *schemapb.ScalarField, _ int) *schemapb.ScalarField { return v }), Data: lo.Map(srcData, func(v *schemapb.ScalarField, _ int) *schemapb.ScalarField { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
srcData := srcField.GetScalars().GetJsonData().GetData() srcData := srcField.GetScalars().GetJsonData().GetData()
validData := srcField.GetValidData()
fieldData = &JSONFieldData{ fieldData = &JSONFieldData{
Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }), Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
} }
default: default:
@ -676,89 +696,105 @@ func InsertMsgToInsertData(msg *msgstream.InsertMsg, schema *schemapb.Collection
func mergeBoolField(data *InsertData, fid FieldID, field *BoolFieldData) { func mergeBoolField(data *InsertData, fid FieldID, field *BoolFieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &BoolFieldData{ fieldData := &BoolFieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*BoolFieldData) fieldData := data.Data[fid].(*BoolFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeInt8Field(data *InsertData, fid FieldID, field *Int8FieldData) { func mergeInt8Field(data *InsertData, fid FieldID, field *Int8FieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &Int8FieldData{ fieldData := &Int8FieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*Int8FieldData) fieldData := data.Data[fid].(*Int8FieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeInt16Field(data *InsertData, fid FieldID, field *Int16FieldData) { func mergeInt16Field(data *InsertData, fid FieldID, field *Int16FieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &Int16FieldData{ fieldData := &Int16FieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*Int16FieldData) fieldData := data.Data[fid].(*Int16FieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeInt32Field(data *InsertData, fid FieldID, field *Int32FieldData) { func mergeInt32Field(data *InsertData, fid FieldID, field *Int32FieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &Int32FieldData{ fieldData := &Int32FieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*Int32FieldData) fieldData := data.Data[fid].(*Int32FieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeInt64Field(data *InsertData, fid FieldID, field *Int64FieldData) { func mergeInt64Field(data *InsertData, fid FieldID, field *Int64FieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &Int64FieldData{ fieldData := &Int64FieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*Int64FieldData) fieldData := data.Data[fid].(*Int64FieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeFloatField(data *InsertData, fid FieldID, field *FloatFieldData) { func mergeFloatField(data *InsertData, fid FieldID, field *FloatFieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &FloatFieldData{ fieldData := &FloatFieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*FloatFieldData) fieldData := data.Data[fid].(*FloatFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeDoubleField(data *InsertData, fid FieldID, field *DoubleFieldData) { func mergeDoubleField(data *InsertData, fid FieldID, field *DoubleFieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &DoubleFieldData{ fieldData := &DoubleFieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*DoubleFieldData) fieldData := data.Data[fid].(*DoubleFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeStringField(data *InsertData, fid FieldID, field *StringFieldData) { func mergeStringField(data *InsertData, fid FieldID, field *StringFieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &StringFieldData{ fieldData := &StringFieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*StringFieldData) fieldData := data.Data[fid].(*StringFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) { func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) {
@ -766,22 +802,26 @@ func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) {
fieldData := &ArrayFieldData{ fieldData := &ArrayFieldData{
ElementType: field.ElementType, ElementType: field.ElementType,
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*ArrayFieldData) fieldData := data.Data[fid].(*ArrayFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeJSONField(data *InsertData, fid FieldID, field *JSONFieldData) { func mergeJSONField(data *InsertData, fid FieldID, field *JSONFieldData) {
if _, ok := data.Data[fid]; !ok { if _, ok := data.Data[fid]; !ok {
fieldData := &JSONFieldData{ fieldData := &JSONFieldData{
Data: nil, Data: nil,
ValidData: nil,
} }
data.Data[fid] = fieldData data.Data[fid] = fieldData
} }
fieldData := data.Data[fid].(*JSONFieldData) fieldData := data.Data[fid].(*JSONFieldData)
fieldData.Data = append(fieldData.Data, field.Data...) fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
} }
func mergeBinaryVectorField(data *InsertData, fid FieldID, field *BinaryVectorFieldData) { func mergeBinaryVectorField(data *InsertData, fid FieldID, field *BinaryVectorFieldData) {

View File

@ -434,6 +434,121 @@ func genAllFieldsSchema(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse
return schema, pkFieldID, fieldIDs return schema, pkFieldID, fieldIDs
} }
func genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse bool) (schema *schemapb.CollectionSchema, pkFieldID UniqueID, fieldIDs []UniqueID) {
schema = &schemapb.CollectionSchema{
Name: "all_fields_schema_nullable",
Description: "all_fields_schema_nullable",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{
DataType: schemapb.DataType_Int64,
IsPrimaryKey: true,
},
{
DataType: schemapb.DataType_Bool,
Nullable: true,
},
{
DataType: schemapb.DataType_Int8,
Nullable: true,
},
{
DataType: schemapb.DataType_Int16,
Nullable: true,
},
{
DataType: schemapb.DataType_Int32,
Nullable: true,
},
{
DataType: schemapb.DataType_Int64,
Nullable: true,
},
{
DataType: schemapb.DataType_Float,
Nullable: true,
},
{
DataType: schemapb.DataType_Double,
Nullable: true,
},
{
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(fVecDim),
},
},
},
{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(bVecDim),
},
},
},
{
DataType: schemapb.DataType_Float16Vector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(f16VecDim),
},
},
},
{
DataType: schemapb.DataType_BFloat16Vector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(bf16VecDim),
},
},
},
{
DataType: schemapb.DataType_Array,
Nullable: true,
},
{
DataType: schemapb.DataType_JSON,
Nullable: true,
},
},
}
if withSparse {
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
})
}
fieldIDs = make([]UniqueID, 0)
for idx := range schema.Fields {
fID := int64(common.StartOfUserFieldID + idx)
schema.Fields[idx].FieldID = fID
if schema.Fields[idx].IsPrimaryKey {
pkFieldID = fID
}
fieldIDs = append(fieldIDs, fID)
}
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: common.RowIDField,
Name: common.RowIDFieldName,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_Int64,
})
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: common.TimeStampField,
Name: common.TimeStampFieldName,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_Int64,
})
return schema, pkFieldID, fieldIDs
}
func generateInt32ArrayList(numRows int) []*schemapb.ScalarField { func generateInt32ArrayList(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows) ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ { for i := 0; i < numRows; i++ {
@ -616,6 +731,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -636,6 +754,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], int8(d)) columns[idx] = append(columns[idx], int8(d))
@ -656,6 +777,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], int16(d)) columns[idx] = append(columns[idx], int16(d))
@ -676,6 +800,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -696,6 +823,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -717,6 +847,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -737,6 +870,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -856,6 +992,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -877,6 +1016,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}, },
FieldId: field.FieldID, FieldId: field.FieldID,
} }
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f) msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data { for _, d := range data {
columns[idx] = append(columns[idx], d) columns[idx] = append(columns[idx], d)
@ -1019,6 +1161,24 @@ func TestColumnBasedInsertMsgToInsertData(t *testing.T) {
} }
} }
func TestColumnBasedInsertMsgToInsertDataNullable(t *testing.T) {
numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim := 2, 2, 8, 2, 2
schema, _, fieldIDs := genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim, true)
msg, _, columns := genColumnBasedInsertMsg(schema, numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim)
idata, err := ColumnBasedInsertMsgToInsertData(msg, schema)
assert.NoError(t, err)
for idx, fID := range fieldIDs {
column := columns[idx]
fData, ok := idata.Data[fID]
assert.True(t, ok)
assert.Equal(t, len(column), fData.RowNum())
for j := range column {
assert.Equal(t, fData.GetRow(j), column[j])
}
}
}
func TestColumnBasedInsertMsgToInsertFloat16VectorDataError(t *testing.T) { func TestColumnBasedInsertMsgToInsertFloat16VectorDataError(t *testing.T) {
msg := &msgstream.InsertMsg{ msg := &msgstream.InsertMsg{
BaseMsg: msgstream.BaseMsg{ BaseMsg: msgstream.BaseMsg{
@ -1145,233 +1305,391 @@ func TestInsertMsgToInsertData2(t *testing.T) {
} }
func TestMergeInsertData(t *testing.T) { func TestMergeInsertData(t *testing.T) {
d1 := &InsertData{ t.Run("empty data in buffer", func(t *testing.T) {
Data: map[int64]FieldData{ d1 := &InsertData{
common.RowIDField: &Int64FieldData{ Data: make(map[FieldID]FieldData),
Data: []int64{1}, Infos: []BlobInfo{},
}, }
common.TimeStampField: &Int64FieldData{ d2 := &InsertData{
Data: []int64{1}, Data: map[int64]FieldData{
}, common.RowIDField: &Int64FieldData{
BoolField: &BoolFieldData{ Data: []int64{2},
Data: []bool{true}, },
}, common.TimeStampField: &Int64FieldData{
Int8Field: &Int8FieldData{ Data: []int64{2},
Data: []int8{1}, },
}, BoolField: &BoolFieldData{
Int16Field: &Int16FieldData{ Data: []bool{false},
Data: []int16{1}, },
}, Int8Field: &Int8FieldData{
Int32Field: &Int32FieldData{ Data: []int8{2},
Data: []int32{1}, },
}, Int16Field: &Int16FieldData{
Int64Field: &Int64FieldData{ Data: []int16{2},
Data: []int64{1}, },
}, Int32Field: &Int32FieldData{
FloatField: &FloatFieldData{ Data: []int32{2},
Data: []float32{0}, },
}, Int64Field: &Int64FieldData{
DoubleField: &DoubleFieldData{ Data: []int64{2},
Data: []float64{0}, },
}, FloatField: &FloatFieldData{
StringField: &StringFieldData{ Data: []float32{0},
Data: []string{"1"}, },
}, DoubleField: &DoubleFieldData{
BinaryVectorField: &BinaryVectorFieldData{ Data: []float64{0},
Data: []byte{0}, },
Dim: 8, StringField: &StringFieldData{
}, Data: []string{"2"},
FloatVectorField: &FloatVectorFieldData{ },
Data: []float32{0}, BinaryVectorField: &BinaryVectorFieldData{
Dim: 1, Data: []byte{0},
}, Dim: 8,
Float16VectorField: &Float16VectorFieldData{ },
Data: []byte{0, 1}, FloatVectorField: &FloatVectorFieldData{
Dim: 1, Data: []float32{0},
}, Dim: 1,
BFloat16VectorField: &BFloat16VectorFieldData{ },
Data: []byte{0, 1}, Float16VectorField: &Float16VectorFieldData{
Dim: 1, Data: []byte{2, 3},
}, Dim: 1,
SparseFloatVectorField: &SparseFloatVectorFieldData{ },
SparseFloatArray: schemapb.SparseFloatArray{ BFloat16VectorField: &BFloat16VectorFieldData{
Dim: 600, Data: []byte{2, 3},
Contents: [][]byte{ Dim: 1,
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), },
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
}, },
}, },
}, ArrayField: &ArrayFieldData{
ArrayField: &ArrayFieldData{ Data: []*schemapb.ScalarField{
Data: []*schemapb.ScalarField{ {
{ Data: &schemapb.ScalarField_IntData{
Data: &schemapb.ScalarField_IntData{ IntData: &schemapb.IntArray{
IntData: &schemapb.IntArray{ Data: []int32{4, 5, 6},
Data: []int32{1, 2, 3}, },
}, },
}, },
}, },
}, },
}, JSONField: &JSONFieldData{
JSONField: &JSONFieldData{ Data: [][]byte{[]byte(`{"hello":"world"}`)},
Data: [][]byte{[]byte(`{"key":"value"}`)},
},
},
Infos: nil,
}
d2 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{2},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{2},
},
BoolField: &BoolFieldData{
Data: []bool{false},
},
Int8Field: &Int8FieldData{
Data: []int8{2},
},
Int16Field: &Int16FieldData{
Data: []int16{2},
},
Int32Field: &Int32FieldData{
Data: []int32{2},
},
Int64Field: &Int64FieldData{
Data: []int64{2},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"2"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
}, },
}, },
ArrayField: &ArrayFieldData{ Infos: nil,
Data: []*schemapb.ScalarField{ }
{
Data: &schemapb.ScalarField_IntData{ MergeInsertData(d1, d2)
IntData: &schemapb.IntArray{
Data: []int32{4, 5, 6}, f, ok := d1.Data[common.RowIDField]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[common.TimeStampField]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[BoolField]
assert.True(t, ok)
assert.Equal(t, []bool{false}, f.(*BoolFieldData).Data)
f, ok = d1.Data[Int8Field]
assert.True(t, ok)
assert.Equal(t, []int8{2}, f.(*Int8FieldData).Data)
f, ok = d1.Data[Int16Field]
assert.True(t, ok)
assert.Equal(t, []int16{2}, f.(*Int16FieldData).Data)
f, ok = d1.Data[Int32Field]
assert.True(t, ok)
assert.Equal(t, []int32{2}, f.(*Int32FieldData).Data)
f, ok = d1.Data[Int64Field]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[FloatField]
assert.True(t, ok)
assert.Equal(t, []float32{0}, f.(*FloatFieldData).Data)
f, ok = d1.Data[DoubleField]
assert.True(t, ok)
assert.Equal(t, []float64{0}, f.(*DoubleFieldData).Data)
f, ok = d1.Data[StringField]
assert.True(t, ok)
assert.Equal(t, []string{"2"}, f.(*StringFieldData).Data)
f, ok = d1.Data[BinaryVectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0}, f.(*BinaryVectorFieldData).Data)
f, ok = d1.Data[FloatVectorField]
assert.True(t, ok)
assert.Equal(t, []float32{0}, f.(*FloatVectorFieldData).Data)
f, ok = d1.Data[Float16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{2, 3}, f.(*Float16VectorFieldData).Data)
f, ok = d1.Data[BFloat16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{2, 3}, f.(*BFloat16VectorFieldData).Data)
f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
}, f.(*SparseFloatVectorFieldData))
f, ok = d1.Data[ArrayField]
assert.True(t, ok)
assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
f, ok = d1.Data[JSONField]
assert.True(t, ok)
assert.EqualValues(t, [][]byte{[]byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data)
})
t.Run("normal case", func(t *testing.T) {
d1 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{1},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{1},
},
BoolField: &BoolFieldData{
Data: []bool{true},
},
Int8Field: &Int8FieldData{
Data: []int8{1},
},
Int16Field: &Int16FieldData{
Data: []int16{1},
},
Int32Field: &Int32FieldData{
Data: []int32{1},
},
Int64Field: &Int64FieldData{
Data: []int64{1},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"1"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{1, 2, 3},
},
}, },
}, },
}, },
}, },
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"key":"value"}`)},
},
}, },
JSONField: &JSONFieldData{ Infos: nil,
Data: [][]byte{[]byte(`{"hello":"world"}`)}, }
d2 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{2},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{2},
},
BoolField: &BoolFieldData{
Data: []bool{false},
},
Int8Field: &Int8FieldData{
Data: []int8{2},
},
Int16Field: &Int16FieldData{
Data: []int16{2},
},
Int32Field: &Int32FieldData{
Data: []int32{2},
},
Int64Field: &Int64FieldData{
Data: []int64{2},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"2"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{4, 5, 6},
},
},
},
},
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"hello":"world"}`)},
},
}, },
}, Infos: nil,
Infos: nil, }
}
MergeInsertData(d1, d2) MergeInsertData(d1, d2)
f, ok := d1.Data[common.RowIDField] f, ok := d1.Data[common.RowIDField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[common.TimeStampField] f, ok = d1.Data[common.TimeStampField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[BoolField] f, ok = d1.Data[BoolField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data) assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data)
f, ok = d1.Data[Int8Field] f, ok = d1.Data[Int8Field]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data) assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data)
f, ok = d1.Data[Int16Field] f, ok = d1.Data[Int16Field]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data) assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data)
f, ok = d1.Data[Int32Field] f, ok = d1.Data[Int32Field]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data) assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data)
f, ok = d1.Data[Int64Field] f, ok = d1.Data[Int64Field]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[FloatField] f, ok = d1.Data[FloatField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data) assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data)
f, ok = d1.Data[DoubleField] f, ok = d1.Data[DoubleField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data) assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data)
f, ok = d1.Data[StringField] f, ok = d1.Data[StringField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data) assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data)
f, ok = d1.Data[BinaryVectorField] f, ok = d1.Data[BinaryVectorField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data) assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data)
f, ok = d1.Data[FloatVectorField] f, ok = d1.Data[FloatVectorField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data) assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data)
f, ok = d1.Data[Float16VectorField] f, ok = d1.Data[Float16VectorField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data) assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data)
f, ok = d1.Data[BFloat16VectorField] f, ok = d1.Data[BFloat16VectorField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data) assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data)
f, ok = d1.Data[SparseFloatVectorField] f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{ assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{ SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600, Dim: 600,
Contents: [][]byte{ Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
}, },
}, }, f.(*SparseFloatVectorFieldData))
}, f.(*SparseFloatVectorFieldData))
f, ok = d1.Data[ArrayField] f, ok = d1.Data[ArrayField]
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData()) assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData()) assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData())
f, ok = d1.Data[JSONField] f, ok = d1.Data[JSONField]
assert.True(t, ok) assert.True(t, ok)
assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data) assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data)
})
} }
func TestMergeFloat16VectorField(t *testing.T) { func TestMergeFloat16VectorField(t *testing.T) {

View File

@ -70,7 +70,7 @@ func (suite *ReaderSuite) SetupTest() {
func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.FieldData) []byte { func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.FieldData) []byte {
dataType := field.GetDataType() dataType := field.GetDataType()
w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID()) w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID(), false)
assert.NotNil(t, w) assert.NotNil(t, w)
defer w.Close() defer w.Close()
@ -81,7 +81,7 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie
dim = 1 dim = 1
} }
evt, err := w.NextInsertEventWriter(int(dim)) evt, err := w.NextInsertEventWriter(false, int(dim))
assert.NoError(t, err) assert.NoError(t, err)
evt.SetEventTimestamp(1, math.MaxInt64) evt.SetEventTimestamp(1, math.MaxInt64)
@ -94,42 +94,42 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie
switch dataType { switch dataType {
case schemapb.DataType_Bool: case schemapb.DataType_Bool:
err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data) err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Int8: case schemapb.DataType_Int8:
err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data) err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Int16: case schemapb.DataType_Int16:
err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data) err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Int32: case schemapb.DataType_Int32:
err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data) err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Int64: case schemapb.DataType_Int64:
err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data) err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Float: case schemapb.DataType_Float:
err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data) err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_Double: case schemapb.DataType_Double:
err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data) err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data, nil)
assert.NoError(t, err) assert.NoError(t, err)
case schemapb.DataType_VarChar: case schemapb.DataType_VarChar:
values := data.(*storage.StringFieldData).Data values := data.(*storage.StringFieldData).Data
for _, val := range values { for _, val := range values {
err = evt.AddOneStringToPayload(val) err = evt.AddOneStringToPayload(val, true)
assert.NoError(t, err) assert.NoError(t, err)
} }
case schemapb.DataType_JSON: case schemapb.DataType_JSON:
rows := data.(*storage.JSONFieldData).Data rows := data.(*storage.JSONFieldData).Data
for i := 0; i < len(rows); i++ { for i := 0; i < len(rows); i++ {
err = evt.AddOneJSONToPayload(rows[i]) err = evt.AddOneJSONToPayload(rows[i], true)
assert.NoError(t, err) assert.NoError(t, err)
} }
case schemapb.DataType_Array: case schemapb.DataType_Array:
rows := data.(*storage.ArrayFieldData).Data rows := data.(*storage.ArrayFieldData).Data
for i := 0; i < len(rows); i++ { for i := 0; i < len(rows); i++ {
err = evt.AddOneArrayToPayload(rows[i]) err = evt.AddOneArrayToPayload(rows[i], true)
assert.NoError(t, err) assert.NoError(t, err)
} }
case schemapb.DataType_BinaryVector: case schemapb.DataType_BinaryVector:

View File

@ -43,7 +43,7 @@ func readData(reader *storage.BinlogReader, et storage.EventTypeCode) ([]any, er
return nil, merr.WrapErrImportFailed(fmt.Sprintf("wrong binlog type, expect:%s, actual:%s", return nil, merr.WrapErrImportFailed(fmt.Sprintf("wrong binlog type, expect:%s, actual:%s",
et.String(), event.TypeCode.String())) et.String(), event.TypeCode.String()))
} }
rows, _, err := event.PayloadReaderInterface.GetDataFromPayload() rows, _, _, err := event.PayloadReaderInterface.GetDataFromPayload()
if err != nil { if err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read data, error: %v", err)) return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read data, error: %v", err))
} }

View File

@ -590,6 +590,7 @@ func AppendFieldData(dst, src []*schemapb.FieldData, idx int64) (appendSize int6
Field: &schemapb.FieldData_Scalars{ Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{}, Scalars: &schemapb.ScalarField{},
}, },
ValidData: fieldData.GetValidData(),
} }
} }
dstScalar := dst[i].GetScalars() dstScalar := dst[i].GetScalars()