diff --git a/internal/core/src/storage/Event.cpp b/internal/core/src/storage/Event.cpp index a3d6e5ef6b..4673e50245 100644 --- a/internal/core/src/storage/Event.cpp +++ b/internal/core/src/storage/Event.cpp @@ -34,7 +34,7 @@ GetFixPartSize(DescriptorEventData& data) { sizeof(data.fix_part.segment_id) + sizeof(data.fix_part.field_id) + sizeof(data.fix_part.start_timestamp) + sizeof(data.fix_part.end_timestamp) + - sizeof(data.fix_part.data_type); + sizeof(data.fix_part.data_type) + sizeof(data.fix_part.nullable); } int GetFixPartSize(BaseEventData& data) { @@ -107,6 +107,8 @@ DescriptorEventDataFixPart::DescriptorEventDataFixPart(BinlogReaderPtr reader) { assert(ast.ok()); ast = reader->Read(sizeof(field_id), &field_id); assert(ast.ok()); + ast = reader->Read(sizeof(nullable), &nullable); + assert(ast.ok()); ast = reader->Read(sizeof(start_timestamp), &start_timestamp); assert(ast.ok()); ast = reader->Read(sizeof(end_timestamp), &end_timestamp); @@ -120,7 +122,7 @@ DescriptorEventDataFixPart::Serialize() { auto fix_part_size = sizeof(collection_id) + sizeof(partition_id) + sizeof(segment_id) + sizeof(field_id) + sizeof(start_timestamp) + sizeof(end_timestamp) + - sizeof(data_type); + sizeof(data_type) + sizeof(nullable); std::vector res(fix_part_size); int offset = 0; memcpy(res.data() + offset, &collection_id, sizeof(collection_id)); @@ -131,6 +133,8 @@ DescriptorEventDataFixPart::Serialize() { offset += sizeof(segment_id); memcpy(res.data() + offset, &field_id, sizeof(field_id)); offset += sizeof(field_id); + memcpy(res.data() + offset, &nullable, sizeof(nullable)); + offset += sizeof(nullable); memcpy(res.data() + offset, &start_timestamp, sizeof(start_timestamp)); offset += sizeof(start_timestamp); memcpy(res.data() + offset, &end_timestamp, sizeof(end_timestamp)); diff --git a/internal/core/src/storage/Event.h b/internal/core/src/storage/Event.h index 87a5d0eb4d..2e5152be45 100644 --- a/internal/core/src/storage/Event.h +++ b/internal/core/src/storage/Event.h @@ -46,6 +46,8 @@ struct DescriptorEventDataFixPart { int64_t partition_id; int64_t segment_id; int64_t field_id; + //(todo:smellthemoon) set nullable false temporarily, will change it + bool nullable = false; Timestamp start_timestamp; Timestamp end_timestamp; milvus::proto::schema::DataType data_type; diff --git a/internal/core/src/storage/InsertData.cpp b/internal/core/src/storage/InsertData.cpp index 514d98d56a..8a74ee1220 100644 --- a/internal/core/src/storage/InsertData.cpp +++ b/internal/core/src/storage/InsertData.cpp @@ -61,6 +61,8 @@ InsertData::serialize_to_remote_file() { des_fix_part.start_timestamp = time_range_.first; des_fix_part.end_timestamp = time_range_.second; des_fix_part.data_type = milvus::proto::schema::DataType(data_type); + //(todo:smellthemoon) set nullable false temporarily, will change it + des_fix_part.nullable = false; for (auto i = int8_t(EventType::DescriptorEvent); i < int8_t(EventType::EventTypeEnd); i++) { diff --git a/internal/core/src/storage/PayloadReader.cpp b/internal/core/src/storage/PayloadReader.cpp index 81b0cae4e0..4d35aa493f 100644 --- a/internal/core/src/storage/PayloadReader.cpp +++ b/internal/core/src/storage/PayloadReader.cpp @@ -77,6 +77,7 @@ PayloadReader::init(std::shared_ptr input) { *rb_reader) { AssertInfo(maybe_batch.ok(), "get batch record success"); auto array = maybe_batch.ValueOrDie()->column(column_index); + // to read field_data_->FillFieldData(array); } AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); diff --git a/internal/datanode/syncmgr/taskv2_test.go b/internal/datanode/syncmgr/taskv2_test.go index c9d2fc302c..dc78eaba6a 100644 --- a/internal/datanode/syncmgr/taskv2_test.go +++ b/internal/datanode/syncmgr/taskv2_test.go @@ -321,6 +321,81 @@ func (s *SyncTaskSuiteV2) TestBuildRecord() { s.EqualValues(2, b.NewRecord().NumRows()) } +func (s *SyncTaskSuiteV2) TestBuildRecordNullable() { + fieldSchemas := []*schemapb.FieldSchema{ + {FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool}, + {FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8}, + {FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16}, + {FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32}, + {FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64}, + {FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float}, + {FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double}, + {FieldID: 8, Name: "field7", DataType: schemapb.DataType_String}, + {FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar}, + {FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}}, + {FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, + {FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32}, + {FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON}, + {FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, + } + + schema, err := typeutil.ConvertToArrowSchema(fieldSchemas) + s.NoError(err) + + b := array.NewRecordBuilder(memory.NewGoAllocator(), schema) + defer b.Release() + + data := &storage.InsertData{ + Data: map[int64]storage.FieldData{ + 1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}}, + 2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}}, + 3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}}, + 4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}}, + 5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}}, + 6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}}, + 7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}}, + 8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}}, + 9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}}, + 10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8}, + 11: &storage.FloatVectorFieldData{ + Data: []float32{4, 5, 6, 7, 4, 5, 6, 7}, + Dim: 4, + }, + 12: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Int32, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, + }, + }, + }, + ValidData: []bool{true, true}, + }, + 13: &storage.JSONFieldData{ + Data: [][]byte{ + []byte(`{"batch":2}`), + []byte(`{"key":"world"}`), + }, + ValidData: []bool{true, true}, + }, + 14: &storage.Float16VectorFieldData{ + Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255}, + Dim: 4, + }, + }, + } + + err = typeutil.BuildRecord(b, data, fieldSchemas) + s.NoError(err) + s.EqualValues(2, b.NewRecord().NumRows()) +} + func TestSyncTaskV2(t *testing.T) { suite.Run(t, new(SyncTaskSuiteV2)) } diff --git a/internal/querynodev2/segments/mock_data.go b/internal/querynodev2/segments/mock_data.go index 6d15212d04..03080b870c 100644 --- a/internal/querynodev2/segments/mock_data.go +++ b/internal/querynodev2/segments/mock_data.go @@ -632,7 +632,7 @@ func SaveDeltaLog(collectionID int64, for i := int64(0); i < dData.RowCount; i++ { int64PkValue := dData.Pks[i].(*storage.Int64PrimaryKey).Value ts := dData.Tss[i] - eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts)) + eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true) sizeTotal += binary.Size(int64PkValue) sizeTotal += binary.Size(ts) } diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 76e12da2ff..2b569bada8 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -1451,7 +1451,7 @@ func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *Loca return err } - rowIDs, err := er.GetInt64FromPayload() + rowIDs, _, err := er.GetInt64FromPayload() if err != nil { return err } diff --git a/internal/storage/binlog_reader.go b/internal/storage/binlog_reader.go index fd02eafe74..98438c59ff 100644 --- a/internal/storage/binlog_reader.go +++ b/internal/storage/binlog_reader.go @@ -50,7 +50,7 @@ func (reader *BinlogReader) NextEventReader() (*EventReader, error) { reader.eventReader.Close() } var err error - reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer) + reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer, reader.descriptorEvent.Nullable) if err != nil { return nil, err } diff --git a/internal/storage/binlog_test.go b/internal/storage/binlog_test.go index 15454bfb71..6f93e60ede 100644 --- a/internal/storage/binlog_test.go +++ b/internal/storage/binlog_test.go @@ -37,25 +37,25 @@ import ( /* #nosec G103 */ func TestInsertBinlog(t *testing.T) { - w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) + w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) - e1, err := w.NextInsertEventWriter() + e1, err := w.NextInsertEventWriter(false) assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) - e2, err := w.NextInsertEventWriter() + e2, err := w.NextInsertEventWriter(false) assert.NoError(t, err) - err = e2.AddDataToPayload([]int64{7, 8, 9}) + err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) assert.NoError(t, err) - err = e2.AddDataToPayload([]bool{true, false, true}) + err = e2.AddDataToPayload([]bool{true, false, true}, nil) assert.Error(t, err) - err = e2.AddDataToPayload([]int64{10, 11, 12}) + err = e2.AddDataToPayload([]int64{10, 11, 12}, nil) assert.NoError(t, err) e2.SetEventTimestamp(300, 400) @@ -123,6 +123,11 @@ func TestInsertBinlog(t *testing.T) { assert.Equal(t, fieldID, int64(40)) pos += int(unsafe.Sizeof(fieldID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(1000)) @@ -201,11 +206,12 @@ func TestInsertBinlog(t *testing.T) { // insert e1, payload e1Payload := buf[pos:e1NxtPos] - e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) + e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false) assert.NoError(t, err) - e1a, err := e1r.GetInt64FromPayload() + e1a, valids, err := e1r.GetInt64FromPayload() assert.NoError(t, err) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) + assert.Nil(t, valids) e1r.Close() // start of e2 @@ -243,11 +249,12 @@ func TestInsertBinlog(t *testing.T) { // insert e2, payload e2Payload := buf[pos:] - e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) + e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false) assert.NoError(t, err) - e2a, err := e2r.GetInt64FromPayload() + e2a, valids, err := e2r.GetInt64FromPayload() assert.NoError(t, err) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) + assert.Nil(t, valids) e2r.Close() assert.Equal(t, int(e2NxtPos), len(buf)) @@ -258,8 +265,9 @@ func TestInsertBinlog(t *testing.T) { event1, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event1) - p1, err := event1.GetInt64FromPayload() + p1, valids, err := event1.GetInt64FromPayload() assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, event1.TypeCode, InsertEventType) ed1, ok := (event1.eventData).(*insertEventData) @@ -270,9 +278,10 @@ func TestInsertBinlog(t *testing.T) { event2, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event2) - p2, err := event2.GetInt64FromPayload() + p2, valids, err := event2.GetInt64FromPayload() assert.NoError(t, err) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) + assert.Nil(t, valids) assert.Equal(t, event2.TypeCode, InsertEventType) ed2, ok := (event2.eventData).(*insertEventData) assert.True(t, ok) @@ -288,21 +297,21 @@ func TestDeleteBinlog(t *testing.T) { e1, err := w.NextDeleteEventWriter() assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) e2, err := w.NextDeleteEventWriter() assert.NoError(t, err) - err = e2.AddDataToPayload([]int64{7, 8, 9}) + err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) assert.NoError(t, err) - err = e2.AddDataToPayload([]bool{true, false, true}) + err = e2.AddDataToPayload([]bool{true, false, true}, nil) assert.Error(t, err) - err = e2.AddDataToPayload([]int64{10, 11, 12}) + err = e2.AddDataToPayload([]int64{10, 11, 12}, nil) assert.NoError(t, err) e2.SetEventTimestamp(300, 400) @@ -370,6 +379,11 @@ func TestDeleteBinlog(t *testing.T) { assert.Equal(t, fieldID, int64(-1)) pos += int(unsafe.Sizeof(fieldID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(1000)) @@ -448,11 +462,12 @@ func TestDeleteBinlog(t *testing.T) { // insert e1, payload e1Payload := buf[pos:e1NxtPos] - e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) + e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false) assert.NoError(t, err) - e1a, err := e1r.GetInt64FromPayload() + e1a, valids, err := e1r.GetInt64FromPayload() assert.NoError(t, err) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) + assert.Nil(t, valids) e1r.Close() // start of e2 @@ -490,10 +505,11 @@ func TestDeleteBinlog(t *testing.T) { // insert e2, payload e2Payload := buf[pos:] - e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) + e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false) assert.NoError(t, err) - e2a, err := e2r.GetInt64FromPayload() + e2a, valids, err := e2r.GetInt64FromPayload() assert.NoError(t, err) + assert.Nil(t, valids) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) e2r.Close() @@ -505,7 +521,8 @@ func TestDeleteBinlog(t *testing.T) { event1, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event1) - p1, err := event1.GetInt64FromPayload() + p1, valids, err := event1.GetInt64FromPayload() + assert.Nil(t, valids) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.NoError(t, err) assert.Equal(t, event1.TypeCode, DeleteEventType) @@ -517,7 +534,8 @@ func TestDeleteBinlog(t *testing.T) { event2, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event2) - p2, err := event2.GetInt64FromPayload() + p2, valids, err := event2.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, event2.TypeCode, DeleteEventType) @@ -535,21 +553,21 @@ func TestDDLBinlog1(t *testing.T) { e1, err := w.NextCreateCollectionEventWriter() assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) e2, err := w.NextDropCollectionEventWriter() assert.NoError(t, err) - err = e2.AddDataToPayload([]int64{7, 8, 9}) + err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) assert.NoError(t, err) - err = e2.AddDataToPayload([]bool{true, false, true}) + err = e2.AddDataToPayload([]bool{true, false, true}, nil) assert.Error(t, err) - err = e2.AddDataToPayload([]int64{10, 11, 12}) + err = e2.AddDataToPayload([]int64{10, 11, 12}, nil) assert.NoError(t, err) e2.SetEventTimestamp(300, 400) @@ -617,6 +635,11 @@ func TestDDLBinlog1(t *testing.T) { assert.Equal(t, fieldID, int64(-1)) pos += int(unsafe.Sizeof(fieldID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(1000)) @@ -695,9 +718,10 @@ func TestDDLBinlog1(t *testing.T) { // insert e1, payload e1Payload := buf[pos:e1NxtPos] - e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) + e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false) assert.NoError(t, err) - e1a, err := e1r.GetInt64FromPayload() + e1a, valids, err := e1r.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) e1r.Close() @@ -737,9 +761,10 @@ func TestDDLBinlog1(t *testing.T) { // insert e2, payload e2Payload := buf[pos:] - e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) + e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false) assert.NoError(t, err) - e2a, err := e2r.GetInt64FromPayload() + e2a, valids, err := e2r.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) e2r.Close() @@ -752,7 +777,8 @@ func TestDDLBinlog1(t *testing.T) { event1, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event1) - p1, err := event1.GetInt64FromPayload() + p1, valids, err := event1.GetInt64FromPayload() + assert.Nil(t, valids) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.NoError(t, err) assert.Equal(t, event1.TypeCode, CreateCollectionEventType) @@ -764,7 +790,8 @@ func TestDDLBinlog1(t *testing.T) { event2, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event2) - p2, err := event2.GetInt64FromPayload() + p2, valids, err := event2.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, event2.TypeCode, DropCollectionEventType) @@ -782,21 +809,21 @@ func TestDDLBinlog2(t *testing.T) { e1, err := w.NextCreatePartitionEventWriter() assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) e2, err := w.NextDropPartitionEventWriter() assert.NoError(t, err) - err = e2.AddDataToPayload([]int64{7, 8, 9}) + err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) assert.NoError(t, err) - err = e2.AddDataToPayload([]bool{true, false, true}) + err = e2.AddDataToPayload([]bool{true, false, true}, nil) assert.Error(t, err) - err = e2.AddDataToPayload([]int64{10, 11, 12}) + err = e2.AddDataToPayload([]int64{10, 11, 12}, nil) assert.NoError(t, err) e2.SetEventTimestamp(300, 400) @@ -863,6 +890,11 @@ func TestDDLBinlog2(t *testing.T) { assert.Equal(t, fieldID, int64(-1)) pos += int(unsafe.Sizeof(fieldID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(1000)) @@ -941,9 +973,10 @@ func TestDDLBinlog2(t *testing.T) { // insert e1, payload e1Payload := buf[pos:e1NxtPos] - e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload) + e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false) assert.NoError(t, err) - e1a, err := e1r.GetInt64FromPayload() + e1a, valids, err := e1r.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6}) e1r.Close() @@ -983,9 +1016,10 @@ func TestDDLBinlog2(t *testing.T) { // insert e2, payload e2Payload := buf[pos:] - e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload) + e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false) assert.NoError(t, err) - e2a, err := e2r.GetInt64FromPayload() + e2a, valids, err := e2r.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12}) e2r.Close() @@ -998,7 +1032,8 @@ func TestDDLBinlog2(t *testing.T) { event1, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event1) - p1, err := event1.GetInt64FromPayload() + p1, valids, err := event1.GetInt64FromPayload() + assert.Nil(t, valids) assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6}) assert.NoError(t, err) assert.Equal(t, event1.TypeCode, CreatePartitionEventType) @@ -1010,7 +1045,8 @@ func TestDDLBinlog2(t *testing.T) { event2, err := r.NextEventReader() assert.NoError(t, err) assert.NotNil(t, event2) - p2, err := event2.GetInt64FromPayload() + p2, valids, err := event2.GetInt64FromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12}) assert.Equal(t, event2.TypeCode, DropPartitionEventType) @@ -1042,7 +1078,7 @@ func TestIndexFileBinlog(t *testing.T) { e, err := w.NextIndexFileEventWriter() assert.NoError(t, err) - err = e.AddByteToPayload(payload) + err = e.AddByteToPayload(payload, nil) assert.NoError(t, err) e.SetEventTimestamp(timestamp, timestamp) @@ -1104,6 +1140,11 @@ func TestIndexFileBinlog(t *testing.T) { assert.Equal(t, fieldID, fID) pos += int(unsafe.Sizeof(fID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(timestamp)) @@ -1171,7 +1212,7 @@ func TestIndexFileBinlogV2(t *testing.T) { e, err := w.NextIndexFileEventWriter() assert.NoError(t, err) - err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload)) + err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true) assert.NoError(t, err) e.SetEventTimestamp(timestamp, timestamp) @@ -1233,6 +1274,11 @@ func TestIndexFileBinlogV2(t *testing.T) { assert.Equal(t, fieldID, fID) pos += int(unsafe.Sizeof(fID)) + // descriptor data fix, nullable + nullable := UnsafeReadBool(buf, pos) + assert.Equal(t, nullable, false) + pos += int(unsafe.Sizeof(nullable)) + // descriptor data fix, start time stamp startts := UnsafeReadInt64(buf, pos) assert.Equal(t, startts, int64(timestamp)) @@ -1309,17 +1355,17 @@ func TestNewBinlogReaderError(t *testing.T) { assert.Nil(t, reader) assert.Error(t, err) - w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) + w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) w.SetEventTimeStamp(1000, 2000) - e1, err := w.NextInsertEventWriter() + e1, err := w.NextInsertEventWriter(false) assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) @@ -1348,7 +1394,7 @@ func TestNewBinlogReaderError(t *testing.T) { } func TestNewBinlogWriterTsError(t *testing.T) { - w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) + w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) _, err := w.GetBuffer() assert.Error(t, err) @@ -1376,21 +1422,21 @@ func TestNewBinlogWriterTsError(t *testing.T) { } func TestInsertBinlogWriterCloseError(t *testing.T) { - insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) - e1, err := insertWriter.NextInsertEventWriter() + insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) + e1, err := insertWriter.NextInsertEventWriter(false) assert.NoError(t, err) sizeTotal := 2000000 insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) insertWriter.SetEventTimeStamp(1000, 2000) err = insertWriter.Finish() assert.NoError(t, err) assert.NotNil(t, insertWriter.buffer) - insertEventWriter, err := insertWriter.NextInsertEventWriter() + insertEventWriter, err := insertWriter.NextInsertEventWriter(false) assert.Nil(t, insertEventWriter) assert.Error(t, err) insertWriter.Close() @@ -1402,7 +1448,7 @@ func TestDeleteBinlogWriteCloseError(t *testing.T) { assert.NoError(t, err) sizeTotal := 2000000 deleteWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) deleteWriter.SetEventTimeStamp(1000, 2000) @@ -1423,7 +1469,7 @@ func TestDDBinlogWriteCloseError(t *testing.T) { sizeTotal := 2000000 ddBinlogWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) e1.SetEventTimestamp(100, 200) @@ -1499,7 +1545,7 @@ func (e *testEvent) SetOffset(offset int32) { var _ EventWriter = (*testEvent)(nil) func TestWriterListError(t *testing.T) { - insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) + insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) sizeTotal := 2000000 insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal)) errorEvent := &testEvent{} diff --git a/internal/storage/binlog_writer.go b/internal/storage/binlog_writer.go index 926c0536f6..583798f112 100644 --- a/internal/storage/binlog_writer.go +++ b/internal/storage/binlog_writer.go @@ -150,7 +150,7 @@ type InsertBinlogWriter struct { } // NextInsertEventWriter returns an event writer to write insert data to an event. -func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEventWriter, error) { +func (writer *InsertBinlogWriter) NextInsertEventWriter(nullable bool, dim ...int) (*insertEventWriter, error) { if writer.isClosed() { return nil, fmt.Errorf("binlog has closed") } @@ -161,9 +161,9 @@ func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEven if len(dim) != 1 { return nil, fmt.Errorf("incorrect input numbers") } - event, err = newInsertEventWriter(writer.PayloadDataType, dim[0]) + event, err = newInsertEventWriter(writer.PayloadDataType, nullable, dim[0]) } else { - event, err = newInsertEventWriter(writer.PayloadDataType) + event, err = newInsertEventWriter(writer.PayloadDataType, nullable) } if err != nil { return nil, err @@ -271,13 +271,14 @@ func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEvent } // NewInsertBinlogWriter creates InsertBinlogWriter to write binlog file. -func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64) *InsertBinlogWriter { +func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64, nullable bool) *InsertBinlogWriter { descriptorEvent := newDescriptorEvent() descriptorEvent.PayloadDataType = dataType descriptorEvent.CollectionID = collectionID descriptorEvent.PartitionID = partitionID descriptorEvent.SegmentID = segmentID descriptorEvent.FieldID = FieldID + descriptorEvent.Nullable = nullable w := &InsertBinlogWriter{ baseBinlogWriter: baseBinlogWriter{ diff --git a/internal/storage/binlog_writer_test.go b/internal/storage/binlog_writer_test.go index 8bc80f6658..02e25d32f3 100644 --- a/internal/storage/binlog_writer_test.go +++ b/internal/storage/binlog_writer_test.go @@ -26,15 +26,15 @@ import ( ) func TestBinlogWriterReader(t *testing.T) { - binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40) + binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40, false) tp := binlogWriter.GetBinlogType() assert.Equal(t, tp, InsertBinlog) binlogWriter.SetEventTimeStamp(1000, 2000) defer binlogWriter.Close() - eventWriter, err := binlogWriter.NextInsertEventWriter() + eventWriter, err := binlogWriter.NextInsertEventWriter(false) assert.NoError(t, err) - err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}) + err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil) assert.NoError(t, err) _, err = binlogWriter.GetBuffer() assert.Error(t, err) @@ -50,7 +50,7 @@ func TestBinlogWriterReader(t *testing.T) { nums, err = binlogWriter.GetRowNums() assert.NoError(t, err) assert.EqualValues(t, 3, nums) - err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}) + err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil) assert.Error(t, err) nums, err = binlogWriter.GetRowNums() assert.NoError(t, err) @@ -64,9 +64,9 @@ func TestBinlogWriterReader(t *testing.T) { assert.NoError(t, err) eventReader, err := binlogReader.NextEventReader() assert.NoError(t, err) - _, err = eventReader.GetInt8FromPayload() + _, _, err = eventReader.GetInt8FromPayload() assert.Error(t, err) - payload, err := eventReader.GetInt32FromPayload() + payload, _, err := eventReader.GetInt32FromPayload() assert.NoError(t, err) assert.EqualValues(t, 3, len(payload)) assert.EqualValues(t, 1, payload[0]) diff --git a/internal/storage/data_codec.go b/internal/storage/data_codec.go index b809161429..d8c44fc9f9 100644 --- a/internal/storage/data_codec.go +++ b/internal/storage/data_codec.go @@ -247,11 +247,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique for _, field := range insertCodec.Schema.Schema.Fields { // encode fields - writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID) + writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID, field.GetNullable()) var eventWriter *insertEventWriter var err error var dim int64 if typeutil.IsVectorType(field.DataType) { + if field.GetNullable() { + return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("vectorType not support null, fieldName: %s", field.GetName())) + } switch field.DataType { case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, @@ -261,14 +264,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique if err != nil { return nil, err } - eventWriter, err = writer.NextInsertEventWriter(int(dim)) + eventWriter, err = writer.NextInsertEventWriter(field.GetNullable(), int(dim)) case schemapb.DataType_SparseFloatVector: - eventWriter, err = writer.NextInsertEventWriter() + eventWriter, err = writer.NextInsertEventWriter(field.GetNullable()) default: return nil, fmt.Errorf("undefined data type %d", field.DataType) } } else { - eventWriter, err = writer.NextInsertEventWriter() + eventWriter, err = writer.NextInsertEventWriter(field.GetNullable()) } if err != nil { writer.Close() @@ -323,48 +326,60 @@ func AddFieldDataToPayload(eventWriter *insertEventWriter, dataType schemapb.Dat var err error switch dataType { case schemapb.DataType_Bool: - if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data); err != nil { + if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data, singleData.(*BoolFieldData).ValidData); err != nil { return err } case schemapb.DataType_Int8: - if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data); err != nil { + if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data, singleData.(*Int8FieldData).ValidData); err != nil { return err } case schemapb.DataType_Int16: - if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data); err != nil { + if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data, singleData.(*Int16FieldData).ValidData); err != nil { return err } case schemapb.DataType_Int32: - if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data); err != nil { + if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data, singleData.(*Int32FieldData).ValidData); err != nil { return err } case schemapb.DataType_Int64: - if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data); err != nil { + if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data, singleData.(*Int64FieldData).ValidData); err != nil { return err } case schemapb.DataType_Float: - if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data); err != nil { + if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data, singleData.(*FloatFieldData).ValidData); err != nil { return err } case schemapb.DataType_Double: - if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data); err != nil { + if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data, singleData.(*DoubleFieldData).ValidData); err != nil { return err } case schemapb.DataType_String, schemapb.DataType_VarChar: - for _, singleString := range singleData.(*StringFieldData).Data { - if err = eventWriter.AddOneStringToPayload(singleString); err != nil { + for i, singleString := range singleData.(*StringFieldData).Data { + isValid := true + if len(singleData.(*StringFieldData).ValidData) != 0 { + isValid = singleData.(*StringFieldData).ValidData[i] + } + if err = eventWriter.AddOneStringToPayload(singleString, isValid); err != nil { return err } } case schemapb.DataType_Array: - for _, singleArray := range singleData.(*ArrayFieldData).Data { - if err = eventWriter.AddOneArrayToPayload(singleArray); err != nil { + for i, singleArray := range singleData.(*ArrayFieldData).Data { + isValid := true + if len(singleData.(*ArrayFieldData).ValidData) != 0 { + isValid = singleData.(*ArrayFieldData).ValidData[i] + } + if err = eventWriter.AddOneArrayToPayload(singleArray, isValid); err != nil { return err } } case schemapb.DataType_JSON: - for _, singleJSON := range singleData.(*JSONFieldData).Data { - if err = eventWriter.AddOneJSONToPayload(singleJSON); err != nil { + for i, singleJSON := range singleData.(*JSONFieldData).Data { + isValid := true + if len(singleData.(*JSONFieldData).ValidData) != 0 { + isValid = singleData.(*JSONFieldData).ValidData[i] + } + if err = eventWriter.AddOneJSONToPayload(singleJSON, isValid); err != nil { return err } } @@ -448,7 +463,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int } switch dataType { case schemapb.DataType_Bool: - singleData, err := eventReader.GetBoolFromPayload() + singleData, validData, err := eventReader.GetBoolFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -463,11 +478,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int boolFieldData := insertData.Data[fieldID].(*BoolFieldData) boolFieldData.Data = append(boolFieldData.Data, singleData...) + boolFieldData.ValidData = append(boolFieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = boolFieldData case schemapb.DataType_Int8: - singleData, err := eventReader.GetInt8FromPayload() + singleData, validData, err := eventReader.GetInt8FromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -482,11 +498,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int int8FieldData := insertData.Data[fieldID].(*Int8FieldData) int8FieldData.Data = append(int8FieldData.Data, singleData...) + int8FieldData.ValidData = append(int8FieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = int8FieldData case schemapb.DataType_Int16: - singleData, err := eventReader.GetInt16FromPayload() + singleData, validData, err := eventReader.GetInt16FromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -501,11 +518,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int int16FieldData := insertData.Data[fieldID].(*Int16FieldData) int16FieldData.Data = append(int16FieldData.Data, singleData...) + int16FieldData.ValidData = append(int16FieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = int16FieldData case schemapb.DataType_Int32: - singleData, err := eventReader.GetInt32FromPayload() + singleData, validData, err := eventReader.GetInt32FromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -520,11 +538,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int int32FieldData := insertData.Data[fieldID].(*Int32FieldData) int32FieldData.Data = append(int32FieldData.Data, singleData...) + int32FieldData.ValidData = append(int32FieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = int32FieldData case schemapb.DataType_Int64: - singleData, err := eventReader.GetInt64FromPayload() + singleData, validData, err := eventReader.GetInt64FromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -539,11 +558,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int int64FieldData := insertData.Data[fieldID].(*Int64FieldData) int64FieldData.Data = append(int64FieldData.Data, singleData...) + int64FieldData.ValidData = append(int64FieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = int64FieldData case schemapb.DataType_Float: - singleData, err := eventReader.GetFloatFromPayload() + singleData, validData, err := eventReader.GetFloatFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -558,11 +578,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int floatFieldData := insertData.Data[fieldID].(*FloatFieldData) floatFieldData.Data = append(floatFieldData.Data, singleData...) + floatFieldData.ValidData = append(floatFieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = floatFieldData case schemapb.DataType_Double: - singleData, err := eventReader.GetDoubleFromPayload() + singleData, validData, err := eventReader.GetDoubleFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -577,11 +598,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int doubleFieldData := insertData.Data[fieldID].(*DoubleFieldData) doubleFieldData.Data = append(doubleFieldData.Data, singleData...) + doubleFieldData.ValidData = append(doubleFieldData.ValidData, validData...) totalLength += len(singleData) insertData.Data[fieldID] = doubleFieldData case schemapb.DataType_String, schemapb.DataType_VarChar: - stringPayload, err := eventReader.GetStringFromPayload() + stringPayload, validData, err := eventReader.GetStringFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -594,14 +616,15 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int } } stringFieldData := insertData.Data[fieldID].(*StringFieldData) + stringFieldData.DataType = dataType stringFieldData.Data = append(stringFieldData.Data, stringPayload...) - stringFieldData.DataType = dataType + stringFieldData.ValidData = append(stringFieldData.ValidData, validData...) totalLength += len(stringPayload) insertData.Data[fieldID] = stringFieldData case schemapb.DataType_Array: - arrayPayload, err := eventReader.GetArrayFromPayload() + arrayPayload, validData, err := eventReader.GetArrayFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -616,11 +639,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int arrayFieldData := insertData.Data[fieldID].(*ArrayFieldData) arrayFieldData.Data = append(arrayFieldData.Data, arrayPayload...) + arrayFieldData.ValidData = append(arrayFieldData.ValidData, validData...) totalLength += len(arrayPayload) insertData.Data[fieldID] = arrayFieldData case schemapb.DataType_JSON: - jsonPayload, err := eventReader.GetJSONFromPayload() + jsonPayload, validData, err := eventReader.GetJSONFromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -635,6 +659,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int jsonFieldData := insertData.Data[fieldID].(*JSONFieldData) jsonFieldData.Data = append(jsonFieldData.Data, jsonPayload...) + jsonFieldData.ValidData = append(jsonFieldData.ValidData, validData...) totalLength += len(jsonPayload) insertData.Data[fieldID] = jsonFieldData @@ -934,7 +959,7 @@ func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID Uni if err != nil { return nil, err } - err = eventWriter.AddOneStringToPayload(string(serializedPayload)) + err = eventWriter.AddOneStringToPayload(string(serializedPayload), true) if err != nil { return nil, err } @@ -1084,7 +1109,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ for _, singleTs := range ts { int64Ts = append(int64Ts, int64(singleTs)) } - err = eventWriter.AddInt64ToPayload(int64Ts) + err = eventWriter.AddInt64ToPayload(int64Ts, nil) if err != nil { return nil, err } @@ -1120,7 +1145,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ if err != nil { return nil, err } - err = eventWriter.AddOneStringToPayload(req) + err = eventWriter.AddOneStringToPayload(req, true) if err != nil { return nil, err } @@ -1130,7 +1155,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ if err != nil { return nil, err } - err = eventWriter.AddOneStringToPayload(req) + err = eventWriter.AddOneStringToPayload(req, true) if err != nil { return nil, err } @@ -1140,7 +1165,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ if err != nil { return nil, err } - err = eventWriter.AddOneStringToPayload(req) + err = eventWriter.AddOneStringToPayload(req, true) if err != nil { return nil, err } @@ -1150,7 +1175,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ if err != nil { return nil, err } - err = eventWriter.AddOneStringToPayload(req) + err = eventWriter.AddOneStringToPayload(req, true) if err != nil { return nil, err } @@ -1211,7 +1236,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [ } switch dataType { case schemapb.DataType_Int64: - int64Ts, err := eventReader.GetInt64FromPayload() + int64Ts, _, err := eventReader.GetInt64FromPayload() if err != nil { eventReader.Close() binlogReader.Close() @@ -1221,7 +1246,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [ resultTs = append(resultTs, Timestamp(singleTs)) } case schemapb.DataType_String: - stringPayload, err := eventReader.GetStringFromPayload() + stringPayload, _, err := eventReader.GetStringFromPayload() if err != nil { eventReader.Close() binlogReader.Close() diff --git a/internal/storage/data_codec_test.go b/internal/storage/data_codec_test.go index b074d045d3..b37886cd20 100644 --- a/internal/storage/data_codec_test.go +++ b/internal/storage/data_codec_test.go @@ -201,6 +201,62 @@ func genTestCollectionMeta() *etcdpb.CollectionMeta { } } +func TestInsertCodecFailed(t *testing.T) { + t.Run("vector field not support null", func(t *testing.T) { + tests := []struct { + description string + dataType schemapb.DataType + }{ + {"nullable FloatVector field", schemapb.DataType_FloatVector}, + {"nullable Float16Vector field", schemapb.DataType_Float16Vector}, + {"nullable BinaryVector field", schemapb.DataType_BinaryVector}, + {"nullable BFloat16Vector field", schemapb.DataType_BFloat16Vector}, + {"nullable SparseFloatVector field", schemapb.DataType_SparseFloatVector}, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + schema := &etcdpb.CollectionMeta{ + ID: CollectionID, + CreateTime: 1, + SegmentIDs: []int64{SegmentID}, + PartitionTags: []string{"partition_0", "partition_1"}, + Schema: &schemapb.CollectionSchema{ + Name: "schema", + Description: "schema", + Fields: []*schemapb.FieldSchema{ + { + FieldID: RowIDField, + Name: "row_id", + Description: "row_id", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: TimestampField, + Name: "Timestamp", + Description: "Timestamp", + DataType: schemapb.DataType_Int64, + }, + { + DataType: test.dataType, + }, + }, + }, + } + insertCodec := NewInsertCodecWithSchema(schema) + insertDataEmpty := &InsertData{ + Data: map[int64]FieldData{ + RowIDField: &Int64FieldData{[]int64{}, nil}, + TimestampField: &Int64FieldData{[]int64{}, nil}, + }, + } + _, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty) + assert.Error(t, err) + }) + } + }) +} + func TestInsertCodec(t *testing.T) { schema := genTestCollectionMeta() insertCodec := NewInsertCodecWithSchema(schema) @@ -374,16 +430,16 @@ func TestInsertCodec(t *testing.T) { insertDataEmpty := &InsertData{ Data: map[int64]FieldData{ - RowIDField: &Int64FieldData{[]int64{}}, - TimestampField: &Int64FieldData{[]int64{}}, - BoolField: &BoolFieldData{[]bool{}}, - Int8Field: &Int8FieldData{[]int8{}}, - Int16Field: &Int16FieldData{[]int16{}}, - Int32Field: &Int32FieldData{[]int32{}}, - Int64Field: &Int64FieldData{[]int64{}}, - FloatField: &FloatFieldData{[]float32{}}, - DoubleField: &DoubleFieldData{[]float64{}}, - StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar}, + RowIDField: &Int64FieldData{[]int64{}, nil}, + TimestampField: &Int64FieldData{[]int64{}, nil}, + BoolField: &BoolFieldData{[]bool{}, nil}, + Int8Field: &Int8FieldData{[]int8{}, nil}, + Int16Field: &Int16FieldData{[]int16{}, nil}, + Int32Field: &Int32FieldData{[]int32{}, nil}, + Int64Field: &Int64FieldData{[]int64{}, nil}, + FloatField: &FloatFieldData{[]float32{}, nil}, + DoubleField: &DoubleFieldData{[]float64{}, nil}, + StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil}, BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8}, FloatVectorField: &FloatVectorFieldData{[]float32{}, 4}, Float16VectorField: &Float16VectorFieldData{[]byte{}, 4}, @@ -394,8 +450,8 @@ func TestInsertCodec(t *testing.T) { Contents: [][]byte{}, }, }, - ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}}, - JSONField: &JSONFieldData{[][]byte{}}, + ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}, nil}, + JSONField: &JSONFieldData{[][]byte{}, nil}, }, } b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty) @@ -557,7 +613,7 @@ func TestUpgradeDeleteLog(t *testing.T) { for i := int64(0); i < dData.RowCount; i++ { int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value ts := dData.Tss[i] - err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts)) + err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true) assert.NoError(t, err) sizeTotal += binary.Size(int64PkValue) sizeTotal += binary.Size(ts) @@ -595,7 +651,7 @@ func TestUpgradeDeleteLog(t *testing.T) { for i := int64(0); i < dData.RowCount; i++ { int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value ts := dData.Tss[i] - err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts)) + err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts), true) assert.NoError(t, err) } eventWriter.SetEventTimestamp(100, 200) @@ -626,7 +682,7 @@ func TestUpgradeDeleteLog(t *testing.T) { for i := int64(0); i < dData.RowCount; i++ { ts := dData.Tss[i] - err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts)) + err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts), true) assert.NoError(t, err) } eventWriter.SetEventTimestamp(100, 200) @@ -657,7 +713,7 @@ func TestUpgradeDeleteLog(t *testing.T) { for i := int64(0); i < dData.RowCount; i++ { int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value - err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue)) + err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue), true) assert.NoError(t, err) } eventWriter.SetEventTimestamp(100, 200) @@ -845,16 +901,16 @@ func TestMemorySize(t *testing.T) { insertDataEmpty := &InsertData{ Data: map[int64]FieldData{ - RowIDField: &Int64FieldData{[]int64{}}, - TimestampField: &Int64FieldData{[]int64{}}, - BoolField: &BoolFieldData{[]bool{}}, - Int8Field: &Int8FieldData{[]int8{}}, - Int16Field: &Int16FieldData{[]int16{}}, - Int32Field: &Int32FieldData{[]int32{}}, - Int64Field: &Int64FieldData{[]int64{}}, - FloatField: &FloatFieldData{[]float32{}}, - DoubleField: &DoubleFieldData{[]float64{}}, - StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar}, + RowIDField: &Int64FieldData{[]int64{}, nil}, + TimestampField: &Int64FieldData{[]int64{}, nil}, + BoolField: &BoolFieldData{[]bool{}, nil}, + Int8Field: &Int8FieldData{[]int8{}, nil}, + Int16Field: &Int16FieldData{[]int16{}, nil}, + Int32Field: &Int32FieldData{[]int32{}, nil}, + Int64Field: &Int64FieldData{[]int64{}, nil}, + FloatField: &FloatFieldData{[]float32{}, nil}, + DoubleField: &DoubleFieldData{[]float64{}, nil}, + StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil}, BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8}, FloatVectorField: &FloatVectorFieldData{[]float32{}, 4}, }, @@ -920,24 +976,24 @@ func TestDeleteData(t *testing.T) { } func TestAddFieldDataToPayload(t *testing.T) { - w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) - e, _ := w.NextInsertEventWriter() + w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) + e, _ := w.NextInsertEventWriter(false) var err error - err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}}) + err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}, nil}) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar}) + err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar, nil}) assert.Error(t, err) err = AddFieldDataToPayload(e, schemapb.DataType_Array, &ArrayFieldData{ ElementType: schemapb.DataType_VarChar, @@ -948,7 +1004,7 @@ func TestAddFieldDataToPayload(t *testing.T) { }}, }) assert.Error(t, err) - err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}}) + err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}, nil}) assert.Error(t, err) err = AddFieldDataToPayload(e, schemapb.DataType_BinaryVector, &BinaryVectorFieldData{[]byte{}, 8}) assert.Error(t, err) diff --git a/internal/storage/event_data.go b/internal/storage/event_data.go index 2b0c9baa6f..fe9f055324 100644 --- a/internal/storage/event_data.go +++ b/internal/storage/event_data.go @@ -46,6 +46,7 @@ type DescriptorEventDataFixPart struct { PartitionID int64 SegmentID int64 FieldID int64 + Nullable bool StartTimestamp typeutil.Timestamp EndTimestamp typeutil.Timestamp PayloadDataType schemapb.DataType @@ -350,6 +351,7 @@ func newDescriptorEventData() *descriptorEventData { StartTimestamp: 0, EndTimestamp: 0, PayloadDataType: -1, + Nullable: false, }, PostHeaderLengths: []uint8{}, Extras: make(map[string]interface{}), diff --git a/internal/storage/event_reader.go b/internal/storage/event_reader.go index b4388d3b53..b7f073dc76 100644 --- a/internal/storage/event_reader.go +++ b/internal/storage/event_reader.go @@ -85,7 +85,7 @@ func (reader *EventReader) Close() { } } -func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventReader, error) { +func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer, nullable bool) (*EventReader, error) { reader := &EventReader{ eventHeader: eventHeader{ baseEventHeader{}, @@ -103,7 +103,7 @@ func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventRea next := int(reader.EventLength - reader.eventHeader.GetMemoryUsageInBytes() - reader.GetEventDataFixPartSize()) payloadBuffer := buffer.Next(next) - payloadReader, err := NewPayloadReader(datatype, payloadBuffer) + payloadReader, err := NewPayloadReader(datatype, payloadBuffer, nullable) if err != nil { return nil, err } diff --git a/internal/storage/event_test.go b/internal/storage/event_test.go index e432e3a829..74827639b8 100644 --- a/internal/storage/event_test.go +++ b/internal/storage/event_test.go @@ -89,17 +89,25 @@ func TestDescriptorEvent(t *testing.T) { int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(segID))) assert.Equal(t, fieldID, int64(-1)) - startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+ + nullable := UnsafeReadBool(buffer, binary.Size(eventHeader{})+ int(unsafe.Sizeof(collID))+ int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(fieldID))) + assert.Equal(t, nullable, false) + startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+ + int(unsafe.Sizeof(collID))+ + int(unsafe.Sizeof(partID))+ + int(unsafe.Sizeof(segID))+ + int(unsafe.Sizeof(fieldID))+ + int(unsafe.Sizeof(nullable))) assert.Equal(t, startTs, int64(0)) endTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+ int(unsafe.Sizeof(collID))+ int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(fieldID))+ + int(unsafe.Sizeof(nullable))+ int(unsafe.Sizeof(startTs))) assert.Equal(t, endTs, int64(0)) colType := UnsafeReadInt32(buffer, binary.Size(eventHeader{})+ @@ -107,6 +115,7 @@ func TestDescriptorEvent(t *testing.T) { int(unsafe.Sizeof(partID))+ int(unsafe.Sizeof(segID))+ int(unsafe.Sizeof(fieldID))+ + int(unsafe.Sizeof(nullable))+ int(unsafe.Sizeof(startTs))+ int(unsafe.Sizeof(endTs))) assert.Equal(t, colType, int32(-1)) @@ -116,6 +125,7 @@ func TestDescriptorEvent(t *testing.T) { int(unsafe.Sizeof(partID)) + int(unsafe.Sizeof(segID)) + int(unsafe.Sizeof(fieldID)) + + int(unsafe.Sizeof(nullable)) + int(unsafe.Sizeof(startTs)) + int(unsafe.Sizeof(endTs)) + int(unsafe.Sizeof(colType)) @@ -161,177 +171,178 @@ func TestInsertEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(dt, pBuf) + pR, err := NewPayloadReader(dt, pBuf, false) assert.NoError(t, err) - values, _, err := pR.GetDataFromPayload() + values, _, _, err := pR.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, values, ev) pR.Close() - r, err := newEventReader(dt, bytes.NewBuffer(wBuf)) + r, err := newEventReader(dt, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - payload, _, err := r.GetDataFromPayload() + payload, nulls, _, err := r.GetDataFromPayload() assert.NoError(t, err) + assert.Nil(t, nulls) assert.Equal(t, payload, ev) r.Close() } t.Run("insert_bool", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Bool) + w, err := newInsertEventWriter(schemapb.DataType_Bool, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Bool, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]bool{true, false, true}) + return w.AddDataToPayload([]bool{true, false, true}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]bool{false, true, false}) + return w.AddDataToPayload([]bool{false, true, false}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []bool{true, false, true, false, true, false}) }) t.Run("insert_int8", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Int8) + w, err := newInsertEventWriter(schemapb.DataType_Int8, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Int8, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int8{1, 2, 3}) + return w.AddDataToPayload([]int8{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int8{4, 5, 6}) + return w.AddDataToPayload([]int8{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []int8{1, 2, 3, 4, 5, 6}) }) t.Run("insert_int16", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Int16) + w, err := newInsertEventWriter(schemapb.DataType_Int16, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Int16, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int16{1, 2, 3}) + return w.AddDataToPayload([]int16{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int16{4, 5, 6}) + return w.AddDataToPayload([]int16{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []int16{1, 2, 3, 4, 5, 6}) }) t.Run("insert_int32", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Int32) + w, err := newInsertEventWriter(schemapb.DataType_Int32, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Int32, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int32{1, 2, 3}) + return w.AddDataToPayload([]int32{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int32{4, 5, 6}) + return w.AddDataToPayload([]int32{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []int32{1, 2, 3, 4, 5, 6}) }) t.Run("insert_int64", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Int64) + w, err := newInsertEventWriter(schemapb.DataType_Int64, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Int64, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int64{1, 2, 3}) + return w.AddDataToPayload([]int64{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int64{4, 5, 6}) + return w.AddDataToPayload([]int64{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []int64{1, 2, 3, 4, 5, 6}) }) t.Run("insert_float32", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Float) + w, err := newInsertEventWriter(schemapb.DataType_Float, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Float, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float32{1, 2, 3}) + return w.AddDataToPayload([]float32{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float32{4, 5, 6}) + return w.AddDataToPayload([]float32{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []float32{1, 2, 3, 4, 5, 6}) }) t.Run("insert_float64", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_Double) + w, err := newInsertEventWriter(schemapb.DataType_Double, false) assert.NoError(t, err) insertT(t, schemapb.DataType_Double, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float64{1, 2, 3}) + return w.AddDataToPayload([]float64{1, 2, 3}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float64{4, 5, 6}) + return w.AddDataToPayload([]float64{4, 5, 6}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5}) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil) }, []float64{1, 2, 3, 4, 5, 6}) }) t.Run("insert_binary_vector", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, 16) + w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, false, 16) assert.NoError(t, err) insertT(t, schemapb.DataType_BinaryVector, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]byte{1, 2, 3, 4}, 16) + return w.AddDataToPayload([]byte{1, 2, 3, 4}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]byte{5, 6, 7, 8}, 16) + return w.AddDataToPayload([]byte{5, 6, 7, 8}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 16) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil) }, []byte{1, 2, 3, 4, 5, 6, 7, 8}) }) t.Run("insert_float_vector", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_FloatVector, 2) + w, err := newInsertEventWriter(schemapb.DataType_FloatVector, false, 2) assert.NoError(t, err) insertT(t, schemapb.DataType_FloatVector, w, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float32{1, 2, 3, 4}, 2) + return w.AddDataToPayload([]float32{1, 2, 3, 4}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]float32{5, 6, 7, 8}, 2) + return w.AddDataToPayload([]float32{5, 6, 7, 8}, nil) }, func(w *insertEventWriter) error { - return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 2) + return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil) }, []float32{1, 2, 3, 4, 5, 6, 7, 8}) }) t.Run("insert_string", func(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_String) + w, err := newInsertEventWriter(schemapb.DataType_String, false) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -349,20 +360,20 @@ func TestInsertEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") assert.Equal(t, s[2], "abcdefg") pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - s, err = pR.GetStringFromPayload() + s, _, err = pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -379,13 +390,13 @@ func TestDeleteEvent(t *testing.T) { w, err := newDeleteEventWriter(schemapb.DataType_String) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -403,10 +414,10 @@ func TestDeleteEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -414,10 +425,10 @@ func TestDeleteEvent(t *testing.T) { pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - s, err = pR.GetStringFromPayload() + s, _, err = pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -439,11 +450,11 @@ func TestCreateCollectionEvent(t *testing.T) { w, err := newCreateCollectionEventWriter(schemapb.DataType_Int64) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload([]int64{1, 2, 3}) + err = w.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int{4, 5, 6}) + err = w.AddDataToPayload([]int{4, 5, 6}, nil) assert.Error(t, err) - err = w.AddDataToPayload([]int64{4, 5, 6}) + err = w.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) err = w.Finish() assert.NoError(t, err) @@ -461,16 +472,16 @@ func TestCreateCollectionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false) assert.NoError(t, err) - values, _, err := pR.GetDataFromPayload() + values, _, _, err := pR.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) pR.Close() - r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - payload, _, err := r.GetDataFromPayload() + payload, _, _, err := r.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) @@ -481,13 +492,13 @@ func TestCreateCollectionEvent(t *testing.T) { w, err := newCreateCollectionEventWriter(schemapb.DataType_String) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -505,10 +516,10 @@ func TestCreateCollectionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -516,10 +527,10 @@ func TestCreateCollectionEvent(t *testing.T) { pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), true) assert.NoError(t, err) - s, err = pR.GetStringFromPayload() + s, _, err = pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -541,11 +552,11 @@ func TestDropCollectionEvent(t *testing.T) { w, err := newDropCollectionEventWriter(schemapb.DataType_Int64) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload([]int64{1, 2, 3}) + err = w.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int{4, 5, 6}) + err = w.AddDataToPayload([]int{4, 5, 6}, nil) assert.Error(t, err) - err = w.AddDataToPayload([]int64{4, 5, 6}) + err = w.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) err = w.Finish() assert.NoError(t, err) @@ -563,16 +574,16 @@ func TestDropCollectionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false) assert.NoError(t, err) - values, _, err := pR.GetDataFromPayload() + values, _, _, err := pR.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) pR.Close() - r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - payload, _, err := r.GetDataFromPayload() + payload, _, _, err := r.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) @@ -583,13 +594,13 @@ func TestDropCollectionEvent(t *testing.T) { w, err := newDropCollectionEventWriter(schemapb.DataType_String) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -607,10 +618,10 @@ func TestDropCollectionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -618,10 +629,10 @@ func TestDropCollectionEvent(t *testing.T) { pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - s, err = r.GetStringFromPayload() + s, _, err = r.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -643,11 +654,11 @@ func TestCreatePartitionEvent(t *testing.T) { w, err := newCreatePartitionEventWriter(schemapb.DataType_Int64) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload([]int64{1, 2, 3}) + err = w.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int{4, 5, 6}) + err = w.AddDataToPayload([]int{4, 5, 6}, nil) assert.Error(t, err) - err = w.AddDataToPayload([]int64{4, 5, 6}) + err = w.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) err = w.Finish() assert.NoError(t, err) @@ -665,16 +676,16 @@ func TestCreatePartitionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false) assert.NoError(t, err) - values, _, err := pR.GetDataFromPayload() + values, _, _, err := pR.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) pR.Close() - r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - payload, _, err := r.GetDataFromPayload() + payload, _, _, err := r.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) @@ -685,13 +696,13 @@ func TestCreatePartitionEvent(t *testing.T) { w, err := newCreatePartitionEventWriter(schemapb.DataType_String) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -709,10 +720,10 @@ func TestCreatePartitionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -720,10 +731,10 @@ func TestCreatePartitionEvent(t *testing.T) { pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - s, err = pR.GetStringFromPayload() + s, _, err = pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -745,11 +756,11 @@ func TestDropPartitionEvent(t *testing.T) { w, err := newDropPartitionEventWriter(schemapb.DataType_Int64) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload([]int64{1, 2, 3}) + err = w.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int{4, 5, 6}) + err = w.AddDataToPayload([]int{4, 5, 6}, nil) assert.Error(t, err) - err = w.AddDataToPayload([]int64{4, 5, 6}) + err = w.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) err = w.Finish() assert.NoError(t, err) @@ -767,16 +778,16 @@ func TestDropPartitionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false) assert.NoError(t, err) - values, _, err := pR.GetDataFromPayload() + values, _, _, err := pR.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6}) pR.Close() - r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - payload, _, err := r.GetDataFromPayload() + payload, _, _, err := r.GetDataFromPayload() assert.NoError(t, err) assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6}) @@ -787,13 +798,13 @@ func TestDropPartitionEvent(t *testing.T) { w, err := newDropPartitionEventWriter(schemapb.DataType_String) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) - err = w.AddOneStringToPayload("567890") + err = w.AddOneStringToPayload("567890", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("abcdefg") + err = w.AddOneStringToPayload("abcdefg", true) assert.NoError(t, err) - err = w.AddDataToPayload([]int{1, 2, 3}) + err = w.AddDataToPayload([]int{1, 2, 3}, nil) assert.Error(t, err) err = w.Finish() assert.NoError(t, err) @@ -811,10 +822,10 @@ func TestDropPartitionEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) - s, err := pR.GetStringFromPayload() + s, _, err := pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -822,10 +833,10 @@ func TestDropPartitionEvent(t *testing.T) { pR.Close() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) - s, err = pR.GetStringFromPayload() + s, _, err = pR.GetStringFromPayload() assert.NoError(t, err) assert.Equal(t, s[0], "1234") assert.Equal(t, s[1], "567890") @@ -843,7 +854,7 @@ func TestIndexFileEvent(t *testing.T) { w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) payload := funcutil.GenRandomBytes() - err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload)) + err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true) assert.NoError(t, err) err = w.Finish() @@ -862,10 +873,10 @@ func TestIndexFileEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_String, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false) assert.NoError(t, err) assert.Equal(t, pR.numRows, int64(1)) - value, err := pR.GetStringFromPayload() + value, _, err := pR.GetStringFromPayload() assert.Equal(t, len(value), 1) @@ -880,7 +891,7 @@ func TestIndexFileEvent(t *testing.T) { w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) payload := funcutil.GenRandomBytes() - err = w.AddByteToPayload(payload) + err = w.AddByteToPayload(payload, nil) assert.NoError(t, err) err = w.Finish() @@ -899,10 +910,10 @@ func TestIndexFileEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false) assert.Equal(t, pR.numRows, int64(len(payload))) assert.NoError(t, err) - value, err := pR.GetByteFromPayload() + value, _, err := pR.GetByteFromPayload() assert.NoError(t, err) assert.Equal(t, payload, value) pR.Close() @@ -914,7 +925,7 @@ func TestIndexFileEvent(t *testing.T) { w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) payload := funcutil.GenRandomBytesWithLength(1000) - err = w.AddByteToPayload(payload) + err = w.AddByteToPayload(payload, nil) assert.NoError(t, err) err = w.Finish() @@ -933,10 +944,10 @@ func TestIndexFileEvent(t *testing.T) { payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{}) pBuf := wBuf[payloadOffset:] - pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf) + pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false) assert.Equal(t, pR.numRows, int64(len(payload))) assert.NoError(t, err) - value, err := pR.GetByteFromPayload() + value, _, err := pR.GetByteFromPayload() assert.NoError(t, err) assert.Equal(t, payload, value) pR.Close() @@ -1044,7 +1055,7 @@ func TestReadFixPartError(t *testing.T) { func TestEventReaderError(t *testing.T) { buf := new(bytes.Buffer) - r, err := newEventReader(schemapb.DataType_Int64, buf) + r, err := newEventReader(schemapb.DataType_Int64, buf, false) assert.Nil(t, r) assert.Error(t, err) @@ -1052,7 +1063,7 @@ func TestEventReaderError(t *testing.T) { err = header.Write(buf) assert.NoError(t, err) - r, err = newEventReader(schemapb.DataType_Int64, buf) + r, err = newEventReader(schemapb.DataType_Int64, buf, false) assert.Nil(t, r) assert.Error(t, err) @@ -1061,7 +1072,7 @@ func TestEventReaderError(t *testing.T) { err = header.Write(buf) assert.NoError(t, err) - r, err = newEventReader(schemapb.DataType_Int64, buf) + r, err = newEventReader(schemapb.DataType_Int64, buf, false) assert.Nil(t, r) assert.Error(t, err) @@ -1078,16 +1089,16 @@ func TestEventReaderError(t *testing.T) { err = binary.Write(buf, common.Endian, insertData) assert.NoError(t, err) - r, err = newEventReader(schemapb.DataType_Int64, buf) + r, err = newEventReader(schemapb.DataType_Int64, buf, false) assert.Nil(t, r) assert.Error(t, err) } func TestEventClose(t *testing.T) { - w, err := newInsertEventWriter(schemapb.DataType_String) + w, err := newInsertEventWriter(schemapb.DataType_String, false) assert.NoError(t, err) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) - err = w.AddDataToPayload("1234") + err = w.AddDataToPayload("1234", nil) assert.NoError(t, err) err = w.Finish() assert.NoError(t, err) @@ -1098,7 +1109,7 @@ func TestEventClose(t *testing.T) { w.Close() wBuf := buf.Bytes() - r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf)) + r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false) assert.NoError(t, err) r.Close() diff --git a/internal/storage/event_writer.go b/internal/storage/event_writer.go index 45de3c0953..58c6c5a9ca 100644 --- a/internal/storage/event_writer.go +++ b/internal/storage/event_writer.go @@ -212,16 +212,16 @@ func newDescriptorEvent() *descriptorEvent { } } -func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventWriter, error) { +func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int) (*insertEventWriter, error) { var payloadWriter PayloadWriterInterface var err error if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) { if len(dim) != 1 { return nil, fmt.Errorf("incorrect input numbers") } - payloadWriter, err = NewPayloadWriter(dataType, dim[0]) + payloadWriter, err = NewPayloadWriter(dataType, nullable, dim[0]) } else { - payloadWriter, err = NewPayloadWriter(dataType) + payloadWriter, err = NewPayloadWriter(dataType, nullable) } if err != nil { return nil, err @@ -244,7 +244,7 @@ func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventW } func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) { - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } @@ -270,7 +270,7 @@ func newCreateCollectionEventWriter(dataType schemapb.DataType) (*createCollecti return nil, errors.New("incorrect data type") } - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } @@ -296,7 +296,7 @@ func newDropCollectionEventWriter(dataType schemapb.DataType) (*dropCollectionEv return nil, errors.New("incorrect data type") } - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } @@ -322,7 +322,7 @@ func newCreatePartitionEventWriter(dataType schemapb.DataType) (*createPartition return nil, errors.New("incorrect data type") } - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } @@ -348,7 +348,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven return nil, errors.New("incorrect data type") } - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } @@ -370,7 +370,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven } func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) { - payloadWriter, err := NewPayloadWriter(dataType) + payloadWriter, err := NewPayloadWriter(dataType, false) if err != nil { return nil, err } diff --git a/internal/storage/event_writer_test.go b/internal/storage/event_writer_test.go index a6b6456159..9b4997edca 100644 --- a/internal/storage/event_writer_test.go +++ b/internal/storage/event_writer_test.go @@ -59,17 +59,17 @@ func TestSizeofStruct(t *testing.T) { } func TestEventWriter(t *testing.T) { - insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32) + insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32, false) assert.NoError(t, err) insertEvent.Close() - insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32) + insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32, false) assert.NoError(t, err) defer insertEvent.Close() - err = insertEvent.AddInt64ToPayload([]int64{1, 1}) + err = insertEvent.AddInt64ToPayload([]int64{1, 1}, nil) assert.Error(t, err) - err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3}) + err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3}, nil) assert.NoError(t, err) nums, err := insertEvent.GetPayloadLengthFromWriter() assert.NoError(t, err) @@ -79,7 +79,7 @@ func TestEventWriter(t *testing.T) { length, err := insertEvent.GetMemoryUsageInBytes() assert.NoError(t, err) assert.EqualValues(t, length, insertEvent.EventLength) - err = insertEvent.AddInt32ToPayload([]int32{1}) + err = insertEvent.AddInt32ToPayload([]int32{1}, nil) assert.Error(t, err) buffer := new(bytes.Buffer) insertEvent.SetEventTimestamp(100, 200) diff --git a/internal/storage/index_data_codec.go b/internal/storage/index_data_codec.go index 0e928c8223..a3dba549e0 100644 --- a/internal/storage/index_data_codec.go +++ b/internal/storage/index_data_codec.go @@ -59,7 +59,7 @@ func (codec *IndexFileBinlogCodec) serializeImpl( } defer eventWriter.Close() - err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value)) + err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value), true) if err != nil { return nil, err } @@ -221,7 +221,8 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) ( switch dataType { // just for backward compatibility case schemapb.DataType_Int8: - content, err := eventReader.GetByteFromPayload() + // todo: smellthemoon, valid_data may need to check when create index + content, _, err := eventReader.GetByteFromPayload() if err != nil { log.Warn("failed to get byte from payload", zap.Error(err)) @@ -239,7 +240,7 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) ( } case schemapb.DataType_String: - content, err := eventReader.GetStringFromPayload() + content, _, err := eventReader.GetStringFromPayload() if err != nil { log.Warn("failed to get string from payload", zap.Error(err)) eventReader.Close() diff --git a/internal/storage/insert_data.go b/internal/storage/insert_data.go index 7d2de208a9..5742ff2bb9 100644 --- a/internal/storage/insert_data.go +++ b/internal/storage/insert_data.go @@ -149,6 +149,7 @@ type FieldData interface { AppendRow(row interface{}) error AppendRows(rows interface{}) error GetDataType() schemapb.DataType + GetNullable() bool } func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema, cap int) (FieldData, error) { @@ -193,88 +194,142 @@ func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema, case schemapb.DataType_SparseFloatVector: return &SparseFloatVectorFieldData{}, nil case schemapb.DataType_Bool: - return &BoolFieldData{ + data := &BoolFieldData{ Data: make([]bool, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil case schemapb.DataType_Int8: - return &Int8FieldData{ + data := &Int8FieldData{ Data: make([]int8, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil case schemapb.DataType_Int16: - return &Int16FieldData{ + data := &Int16FieldData{ Data: make([]int16, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil case schemapb.DataType_Int32: - return &Int32FieldData{ + data := &Int32FieldData{ Data: make([]int32, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil case schemapb.DataType_Int64: - return &Int64FieldData{ + data := &Int64FieldData{ Data: make([]int64, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil + case schemapb.DataType_Float: - return &FloatFieldData{ + data := &FloatFieldData{ Data: make([]float32, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil case schemapb.DataType_Double: - return &DoubleFieldData{ + data := &DoubleFieldData{ Data: make([]float64, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil + case schemapb.DataType_JSON: - return &JSONFieldData{ + data := &JSONFieldData{ Data: make([][]byte, 0, cap), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil + case schemapb.DataType_Array: - return &ArrayFieldData{ + data := &ArrayFieldData{ Data: make([]*schemapb.ScalarField, 0, cap), ElementType: fieldSchema.GetElementType(), - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil + case schemapb.DataType_String, schemapb.DataType_VarChar: - return &StringFieldData{ + data := &StringFieldData{ Data: make([]string, 0, cap), DataType: dataType, - }, nil + } + if fieldSchema.GetNullable() { + data.ValidData = make([]bool, 0, cap) + } + return data, nil default: return nil, fmt.Errorf("Unexpected schema data type: %d", dataType) } } type BoolFieldData struct { - Data []bool + Data []bool + ValidData []bool } type Int8FieldData struct { - Data []int8 + Data []int8 + ValidData []bool } type Int16FieldData struct { - Data []int16 + Data []int16 + ValidData []bool } type Int32FieldData struct { - Data []int32 + Data []int32 + ValidData []bool } type Int64FieldData struct { - Data []int64 + Data []int64 + ValidData []bool } type FloatFieldData struct { - Data []float32 + Data []float32 + ValidData []bool } type DoubleFieldData struct { - Data []float64 + Data []float64 + ValidData []bool } type StringFieldData struct { - Data []string - DataType schemapb.DataType + Data []string + DataType schemapb.DataType + ValidData []bool } type ArrayFieldData struct { ElementType schemapb.DataType Data []*schemapb.ScalarField + ValidData []bool } type JSONFieldData struct { - Data [][]byte + Data [][]byte + ValidData []bool } type BinaryVectorFieldData struct { Data []byte @@ -671,13 +726,33 @@ func (data *SparseFloatVectorFieldData) AppendRows(rows interface{}) error { } // GetMemorySize implements FieldData.GetMemorySize -func (data *BoolFieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *Int8FieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *Int16FieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *Int32FieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *Int64FieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *FloatFieldData) GetMemorySize() int { return binary.Size(data.Data) } -func (data *DoubleFieldData) GetMemorySize() int { return binary.Size(data.Data) } +func (data *BoolFieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *Int8FieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *Int16FieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *Int32FieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *Int64FieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *FloatFieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} + +func (data *DoubleFieldData) GetMemorySize() int { + return binary.Size(data.Data) + binary.Size(data.ValidData) +} func (data *BinaryVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } func (data *FloatVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } func (data *Float16VectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } @@ -802,3 +877,63 @@ func (data *ArrayFieldData) GetRowSize(i int) int { func (data *SparseFloatVectorFieldData) GetRowSize(i int) int { return len(data.Contents[i]) } + +func (data *BoolFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *Int8FieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *Int16FieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *Int32FieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *Int64FieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *FloatFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *DoubleFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *BFloat16VectorFieldData) GetNullable() bool { + return false +} + +func (data *BinaryVectorFieldData) GetNullable() bool { + return false +} + +func (data *FloatVectorFieldData) GetNullable() bool { + return false +} + +func (data *SparseFloatVectorFieldData) GetNullable() bool { + return false +} + +func (data *Float16VectorFieldData) GetNullable() bool { + return false +} + +func (data *StringFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *ArrayFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} + +func (data *JSONFieldData) GetNullable() bool { + return len(data.ValidData) != 0 +} diff --git a/internal/storage/insert_data_test.go b/internal/storage/insert_data_test.go index cdd85331fb..a941150039 100644 --- a/internal/storage/insert_data_test.go +++ b/internal/storage/insert_data_test.go @@ -41,6 +41,39 @@ func (s *InsertDataSuite) TestInsertData() { s.Nil(idata) }) + s.Run("nullable field schema", func() { + tests := []struct { + description string + dataType schemapb.DataType + }{ + {"nullable bool field", schemapb.DataType_Bool}, + {"nullable int8 field", schemapb.DataType_Int8}, + {"nullable int16 field", schemapb.DataType_Int16}, + {"nullable int32 field", schemapb.DataType_Int32}, + {"nullable int64 field", schemapb.DataType_Int64}, + {"nullable float field", schemapb.DataType_Float}, + {"nullable double field", schemapb.DataType_Double}, + {"nullable json field", schemapb.DataType_JSON}, + {"nullable array field", schemapb.DataType_Array}, + {"nullable string/varchar field", schemapb.DataType_String}, + } + + for _, test := range tests { + s.Run(test.description, func() { + schema := &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + DataType: test.dataType, + Nullable: true, + }, + }, + } + _, err := NewInsertData(schema) + s.Nil(err) + }) + } + }) + s.Run("invalid schema", func() { tests := []struct { description string @@ -183,6 +216,14 @@ func (s *InsertDataSuite) TestGetDataType() { } } +func (s *InsertDataSuite) TestGetNullable() { + for _, field := range s.schema.GetFields() { + fieldData, ok := s.iDataOneRow.Data[field.GetFieldID()] + s.True(ok) + s.Equal(field.GetNullable(), fieldData.GetNullable()) + } +} + func (s *InsertDataSuite) SetupTest() { var err error s.iDataEmpty, err = NewInsertData(s.schema) diff --git a/internal/storage/payload.go b/internal/storage/payload.go index 683b91b401..f62a569fc0 100644 --- a/internal/storage/payload.go +++ b/internal/storage/payload.go @@ -26,18 +26,18 @@ import ( // PayloadWriterInterface abstracts PayloadWriter type PayloadWriterInterface interface { - AddDataToPayload(msgs any, dim ...int) error - AddBoolToPayload(msgs []bool) error - AddByteToPayload(msgs []byte) error - AddInt8ToPayload(msgs []int8) error - AddInt16ToPayload(msgs []int16) error - AddInt32ToPayload(msgs []int32) error - AddInt64ToPayload(msgs []int64) error - AddFloatToPayload(msgs []float32) error - AddDoubleToPayload(msgs []float64) error - AddOneStringToPayload(msgs string) error - AddOneArrayToPayload(msg *schemapb.ScalarField) error - AddOneJSONToPayload(msg []byte) error + AddDataToPayload(msgs any, valids []bool) error + AddBoolToPayload(msgs []bool, valids []bool) error + AddByteToPayload(msgs []byte, valids []bool) error + AddInt8ToPayload(msgs []int8, valids []bool) error + AddInt16ToPayload(msgs []int16, valids []bool) error + AddInt32ToPayload(msgs []int32, valids []bool) error + AddInt64ToPayload(msgs []int64, valids []bool) error + AddFloatToPayload(msgs []float32, valids []bool) error + AddDoubleToPayload(msgs []float64, valids []bool) error + AddOneStringToPayload(msgs string, isValid bool) error + AddOneArrayToPayload(msg *schemapb.ScalarField, isValid bool) error + AddOneJSONToPayload(msg []byte, isValid bool) error AddBinaryVectorToPayload(binVec []byte, dim int) error AddFloatVectorToPayload(binVec []float32, dim int) error AddFloat16VectorToPayload(binVec []byte, dim int) error @@ -53,18 +53,18 @@ type PayloadWriterInterface interface { // PayloadReaderInterface abstracts PayloadReader type PayloadReaderInterface interface { - GetDataFromPayload() (any, int, error) - GetBoolFromPayload() ([]bool, error) - GetByteFromPayload() ([]byte, error) - GetInt8FromPayload() ([]int8, error) - GetInt16FromPayload() ([]int16, error) - GetInt32FromPayload() ([]int32, error) - GetInt64FromPayload() ([]int64, error) - GetFloatFromPayload() ([]float32, error) - GetDoubleFromPayload() ([]float64, error) - GetStringFromPayload() ([]string, error) - GetArrayFromPayload() ([]*schemapb.ScalarField, error) - GetJSONFromPayload() ([][]byte, error) + GetDataFromPayload() (any, []bool, int, error) + GetBoolFromPayload() ([]bool, []bool, error) + GetByteFromPayload() ([]byte, []bool, error) + GetInt8FromPayload() ([]int8, []bool, error) + GetInt16FromPayload() ([]int16, []bool, error) + GetInt32FromPayload() ([]int32, []bool, error) + GetInt64FromPayload() ([]int64, []bool, error) + GetFloatFromPayload() ([]float32, []bool, error) + GetDoubleFromPayload() ([]float64, []bool, error) + GetStringFromPayload() ([]string, []bool, error) + GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error) + GetJSONFromPayload() ([][]byte, []bool, error) GetBinaryVectorFromPayload() ([]byte, int, error) GetFloat16VectorFromPayload() ([]byte, int, error) GetBFloat16VectorFromPayload() ([]byte, int, error) diff --git a/internal/storage/payload_reader.go b/internal/storage/payload_reader.go index b3c0cc4e21..9054b57d1b 100644 --- a/internal/storage/payload_reader.go +++ b/internal/storage/payload_reader.go @@ -4,29 +4,35 @@ import ( "bytes" "context" "fmt" + "time" "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" "github.com/apache/arrow/go/v12/parquet" "github.com/apache/arrow/go/v12/parquet/file" "github.com/apache/arrow/go/v12/parquet/pqarrow" "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/typeutil" ) // PayloadReader reads data from payload type PayloadReader struct { - reader *file.Reader - colType schemapb.DataType - numRows int64 + reader *file.Reader + colType schemapb.DataType + numRows int64 + nullable bool } var _ PayloadReaderInterface = (*PayloadReader)(nil) -func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, error) { +func NewPayloadReader(colType schemapb.DataType, buf []byte, nullable bool) (*PayloadReader, error) { if len(buf) == 0 { return nil, errors.New("create Payload reader failed, buffer is empty") } @@ -34,59 +40,66 @@ func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, er if err != nil { return nil, err } - return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows()}, nil + return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows(), nullable: nullable}, nil } // GetDataFromPayload returns data,length from payload, returns err if failed // Return: // -// `interface{}`: all types. -// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type. -// `error`: error. -func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) { + +// `interface{}`: all types. +// `[]bool`: validData, only meaningful to ScalarField. +// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type. +// `error`: error. +func (r *PayloadReader) GetDataFromPayload() (interface{}, []bool, int, error) { switch r.colType { case schemapb.DataType_Bool: - val, err := r.GetBoolFromPayload() - return val, 0, err + val, validData, err := r.GetBoolFromPayload() + return val, validData, 0, err case schemapb.DataType_Int8: - val, err := r.GetInt8FromPayload() - return val, 0, err + val, validData, err := r.GetInt8FromPayload() + return val, validData, 0, err case schemapb.DataType_Int16: - val, err := r.GetInt16FromPayload() - return val, 0, err + val, validData, err := r.GetInt16FromPayload() + return val, validData, 0, err case schemapb.DataType_Int32: - val, err := r.GetInt32FromPayload() - return val, 0, err + val, validData, err := r.GetInt32FromPayload() + return val, validData, 0, err case schemapb.DataType_Int64: - val, err := r.GetInt64FromPayload() - return val, 0, err + val, validData, err := r.GetInt64FromPayload() + return val, validData, 0, err case schemapb.DataType_Float: - val, err := r.GetFloatFromPayload() - return val, 0, err + val, validData, err := r.GetFloatFromPayload() + return val, validData, 0, err case schemapb.DataType_Double: - val, err := r.GetDoubleFromPayload() - return val, 0, err + val, validData, err := r.GetDoubleFromPayload() + return val, validData, 0, err case schemapb.DataType_BinaryVector: - return r.GetBinaryVectorFromPayload() + val, dim, err := r.GetBinaryVectorFromPayload() + return val, nil, dim, err case schemapb.DataType_FloatVector: - return r.GetFloatVectorFromPayload() + val, dim, err := r.GetFloatVectorFromPayload() + return val, nil, dim, err case schemapb.DataType_Float16Vector: - return r.GetFloat16VectorFromPayload() + val, dim, err := r.GetFloat16VectorFromPayload() + return val, nil, dim, err case schemapb.DataType_BFloat16Vector: - return r.GetBFloat16VectorFromPayload() + val, dim, err := r.GetBFloat16VectorFromPayload() + return val, nil, dim, err case schemapb.DataType_SparseFloatVector: - return r.GetSparseFloatVectorFromPayload() + val, dim, err := r.GetSparseFloatVectorFromPayload() + return val, nil, dim, err case schemapb.DataType_String, schemapb.DataType_VarChar: - val, err := r.GetStringFromPayload() - return val, 0, err + val, validData, err := r.GetStringFromPayload() + return val, validData, 0, err case schemapb.DataType_Array: - val, err := r.GetArrayFromPayload() - return val, 0, err + val, validData, err := r.GetArrayFromPayload() + return val, validData, 0, err case schemapb.DataType_JSON: - val, err := r.GetJSONFromPayload() - return val, 0, err + val, validData, err := r.GetJSONFromPayload() + return val, validData, 0, err default: - return nil, 0, errors.New("unknown type") + return nil, nil, 0, merr.WrapErrParameterInvalidMsg("unknown type") } } @@ -96,169 +109,327 @@ func (r *PayloadReader) ReleasePayloadReader() error { } // GetBoolFromPayload returns bool slice from payload. -func (r *PayloadReader) GetBoolFromPayload() ([]bool, error) { +func (r *PayloadReader) GetBoolFromPayload() ([]bool, []bool, error) { if r.colType != schemapb.DataType_Bool { - return nil, fmt.Errorf("failed to get bool from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get bool from datatype %v", r.colType.String())) } values := make([]bool, r.numRows) + + if r.nullable { + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[bool, *array.Boolean](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } valuesRead, err := ReadDataFromAllRowGroups[bool, *file.BooleanColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } - if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } - return values, nil + return values, nil, nil } // GetByteFromPayload returns byte slice from payload -func (r *PayloadReader) GetByteFromPayload() ([]byte, error) { +func (r *PayloadReader) GetByteFromPayload() ([]byte, []bool, error) { if r.colType != schemapb.DataType_Int8 { - return nil, fmt.Errorf("failed to get byte from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get byte from datatype %v", r.colType.String())) } + if r.nullable { + values := make([]int32, r.numRows) + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + ret := make([]byte, r.numRows) + for i := int64(0); i < r.numRows; i++ { + ret[i] = byte(values[i]) + } + return ret, validData, nil + } values := make([]int32, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } ret := make([]byte, r.numRows) for i := int64(0); i < r.numRows; i++ { ret[i] = byte(values[i]) } - return ret, nil + return ret, nil, nil } -// GetInt8FromPayload returns int8 slice from payload -func (r *PayloadReader) GetInt8FromPayload() ([]int8, error) { +func (r *PayloadReader) GetInt8FromPayload() ([]int8, []bool, error) { if r.colType != schemapb.DataType_Int8 { - return nil, fmt.Errorf("failed to get int8 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int8 from datatype %v", r.colType.String())) } + if r.nullable { + values := make([]int8, r.numRows) + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[int8, *array.Int8](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + + return values, validData, nil + } values := make([]int32, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } ret := make([]int8, r.numRows) for i := int64(0); i < r.numRows; i++ { ret[i] = int8(values[i]) } - return ret, nil + return ret, nil, nil } -func (r *PayloadReader) GetInt16FromPayload() ([]int16, error) { +func (r *PayloadReader) GetInt16FromPayload() ([]int16, []bool, error) { if r.colType != schemapb.DataType_Int16 { - return nil, fmt.Errorf("failed to get int16 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int16 from datatype %v", r.colType.String())) } + if r.nullable { + values := make([]int16, r.numRows) + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[int16, *array.Int16](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } values := make([]int32, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } ret := make([]int16, r.numRows) for i := int64(0); i < r.numRows; i++ { ret[i] = int16(values[i]) } - return ret, nil + return ret, nil, nil } -func (r *PayloadReader) GetInt32FromPayload() ([]int32, error) { +func (r *PayloadReader) GetInt32FromPayload() ([]int32, []bool, error) { if r.colType != schemapb.DataType_Int32 { - return nil, fmt.Errorf("failed to get int32 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int32 from datatype %v", r.colType.String())) } values := make([]int32, r.numRows) + if r.nullable { + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } - return values, nil + return values, nil, nil } -func (r *PayloadReader) GetInt64FromPayload() ([]int64, error) { +func (r *PayloadReader) GetInt64FromPayload() ([]int64, []bool, error) { if r.colType != schemapb.DataType_Int64 { - return nil, fmt.Errorf("failed to get int64 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int64 from datatype %v", r.colType.String())) } values := make([]int64, r.numRows) + if r.nullable { + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[int64, *array.Int64](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + + return values, validData, nil + } valuesRead, err := ReadDataFromAllRowGroups[int64, *file.Int64ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } - return values, nil + return values, nil, nil } -func (r *PayloadReader) GetFloatFromPayload() ([]float32, error) { +func (r *PayloadReader) GetFloatFromPayload() ([]float32, []bool, error) { if r.colType != schemapb.DataType_Float { - return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get float32 from datatype %v", r.colType.String())) } values := make([]float32, r.numRows) + if r.nullable { + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[float32, *array.Float32](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } valuesRead, err := ReadDataFromAllRowGroups[float32, *file.Float32ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } - if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } - - return values, nil + return values, nil, nil } -func (r *PayloadReader) GetDoubleFromPayload() ([]float64, error) { +func (r *PayloadReader) GetDoubleFromPayload() ([]float64, []bool, error) { if r.colType != schemapb.DataType_Double { - return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get double from datatype %v", r.colType.String())) } values := make([]float64, r.numRows) + if r.nullable { + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[float64, *array.Float64](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } valuesRead, err := ReadDataFromAllRowGroups[float64, *file.Float64ColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { - return nil, err + return nil, nil, err } if valuesRead != r.numRows { - return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") } - return values, nil + return values, nil, nil } -func (r *PayloadReader) GetStringFromPayload() ([]string, error) { +func (r *PayloadReader) GetStringFromPayload() ([]string, []bool, error) { if r.colType != schemapb.DataType_String && r.colType != schemapb.DataType_VarChar { - return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get string from datatype %v", r.colType.String())) } - return readByteAndConvert(r, func(bytes parquet.ByteArray) string { + if r.nullable { + values := make([]string, r.numRows) + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[string, *array.String](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err + } + + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + return values, validData, nil + } + value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) string { return bytes.String() }) + if err != nil { + return nil, nil, err + } + return value, nil, nil +} + +func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error) { + if r.colType != schemapb.DataType_Array { + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get array from datatype %v", r.colType.String())) + } + + if r.nullable { + return readNullableByteAndConvert(r, func(bytes []byte) *schemapb.ScalarField { + v := &schemapb.ScalarField{} + proto.Unmarshal(bytes, v) + return v + }) + } + value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField { + v := &schemapb.ScalarField{} + proto.Unmarshal(bytes, v) + return v + }) + if err != nil { + return nil, nil, err + } + return value, nil, nil +} + +func (r *PayloadReader) GetJSONFromPayload() ([][]byte, []bool, error) { + if r.colType != schemapb.DataType_JSON { + return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get json from datatype %v", r.colType.String())) + } + + if r.nullable { + return readNullableByteAndConvert(r, func(bytes []byte) []byte { + return bytes + }) + } + value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) []byte { + return bytes + }) + if err != nil { + return nil, nil, err + } + return value, nil, nil } func (r *PayloadReader) GetByteArrayDataSet() (*DataSet[parquet.ByteArray, *file.ByteArrayColumnChunkReader], error) { @@ -282,25 +453,23 @@ func (r *PayloadReader) GetArrowRecordReader() (pqarrow.RecordReader, error) { return rr, nil } -func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, error) { - if r.colType != schemapb.DataType_Array { - return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) - } - return readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField { - v := &schemapb.ScalarField{} - proto.Unmarshal(bytes, v) - return v - }) -} - -func (r *PayloadReader) GetJSONFromPayload() ([][]byte, error) { - if r.colType != schemapb.DataType_JSON { - return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) +func readNullableByteAndConvert[T any](r *PayloadReader, convert func([]byte) T) ([]T, []bool, error) { + values := make([][]byte, r.numRows) + validData := make([]bool, r.numRows) + valuesRead, err := ReadData[[]byte, *array.Binary](r.reader, values, validData, r.numRows) + if err != nil { + return nil, nil, err } - return readByteAndConvert(r, func(bytes parquet.ByteArray) []byte { - return bytes - }) + if valuesRead != r.numRows { + return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows") + } + + ret := make([]T, r.numRows) + for i := 0; i < int(r.numRows); i++ { + ret[i] = convert(values[i]) + } + return ret, validData, nil } func readByteAndConvert[T any](r *PayloadReader, convert func(parquet.ByteArray) T) ([]T, error) { @@ -568,3 +737,67 @@ func (s *DataSet[T, E]) NextBatch(batch int64) ([]T, error) { s.cnt += batch return result, nil } + +func ReadData[T any, E interface { + Value(int) T + NullBitmapBytes() []byte +}](reader *file.Reader, value []T, validData []bool, numRows int64) (int64, error) { + var offset int + fileReader, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator) + // defer fileReader.ParquetReader().Close() + if err != nil { + log.Warn("create arrow parquet file reader failed", zap.Error(err)) + return -1, err + } + schema, err := fileReader.Schema() + if err != nil { + log.Warn("can't schema from file", zap.Error(err)) + return -1, err + } + for i, field := range schema.Fields() { + // Spawn a new context to ignore cancellation from parental context. + newCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + columnReader, err := fileReader.GetColumn(newCtx, i) + if err != nil { + log.Warn("get column reader failed", zap.String("fieldName", field.Name), zap.Error(err)) + return -1, err + } + chunked, err := columnReader.NextBatch(numRows) + if err != nil { + return -1, err + } + for _, chunk := range chunked.Chunks() { + dataNums := chunk.Data().Len() + reader, ok := chunk.(E) + if !ok { + log.Warn("the column data in parquet is not equal to field", zap.String("fieldName", field.Name), zap.String("actual type", chunk.DataType().Name())) + return -1, merr.WrapErrImportFailed(fmt.Sprintf("the column data in parquet is not equal to field: %s, but: %s", field.Name, chunk.DataType().Name())) + } + nullBitset := bytesToBoolArray(dataNums, reader.NullBitmapBytes()) + for i := 0; i < dataNums; i++ { + value[offset] = reader.Value(i) + validData[offset] = nullBitset[i] + offset++ + } + } + } + return int64(offset), nil +} + +// todo(smellthemoon): use byte to store valid_data +func bytesToBoolArray(length int, bytes []byte) []bool { + bools := make([]bool, 0, length) + + for i := 0; i < length; i++ { + bit := (bytes[uint(i)/8] & BitMask[byte(i)%8]) != 0 + bools = append(bools, bit) + } + + return bools +} + +var ( + BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128} + FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127} +) diff --git a/internal/storage/payload_reader_test.go b/internal/storage/payload_reader_test.go index f301c88275..87fccdfee3 100644 --- a/internal/storage/payload_reader_test.go +++ b/internal/storage/payload_reader_test.go @@ -31,7 +31,7 @@ func (s *ReadDataFromAllRowGroupsSuite) SetupSuite() { s.size = 1 << 10 data := make([]int8, s.size) - err = ew.AddInt8ToPayload(data) + err = ew.AddInt8ToPayload(data, nil) s.Require().NoError(err) ew.SetEventTimestamp(1, 1) diff --git a/internal/storage/payload_test.go b/internal/storage/payload_test.go index b7109043dd..b477eed5c6 100644 --- a/internal/storage/payload_test.go +++ b/internal/storage/payload_test.go @@ -26,18 +26,19 @@ import ( "github.com/stretchr/testify/require" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/typeutil" ) func TestPayload_ReaderAndWriter(t *testing.T) { t.Run("TestBool", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, false, false, false}) + err = w.AddBoolToPayload([]bool{false, false, false, false}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]bool{false, false, false, false}) + err = w.AddDataToPayload([]bool{false, false, false, false}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -50,29 +51,31 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Bool, buffer) + r, err := NewPayloadReader(schemapb.DataType_Bool, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 8) - bools, err := r.GetBoolFromPayload() + bools, valids, err := r.GetBoolFromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []bool{false, false, false, false, false, false, false, false}, bools) - ibools, _, err := r.GetDataFromPayload() + assert.Equal(t, []bool{false, false, false, false, false, false, false, false}, bools) + assert.Nil(t, valids) + ibools, valids, _, err := r.GetDataFromPayload() bools = ibools.([]bool) assert.NoError(t, err) - assert.ElementsMatch(t, []bool{false, false, false, false, false, false, false, false}, bools) + assert.Nil(t, valids) + assert.Equal(t, []bool{false, false, false, false, false, false, false, false}, bools) defer r.ReleasePayloadReader() }) t.Run("TestInt8", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt8ToPayload([]int8{1, 2, 3}) + err = w.AddInt8ToPayload([]int8{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int8{4, 5, 6}) + err = w.AddDataToPayload([]int8{4, 5, 6}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -85,32 +88,34 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int8, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int8, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - int8s, err := r.GetInt8FromPayload() + int8s, valids, err := r.GetInt8FromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []int8{1, 2, 3, 4, 5, 6}, int8s) + assert.Nil(t, valids) + assert.Equal(t, []int8{1, 2, 3, 4, 5, 6}, int8s) - iint8s, _, err := r.GetDataFromPayload() + iint8s, valids, _, err := r.GetDataFromPayload() int8s = iint8s.([]int8) assert.NoError(t, err) + assert.Nil(t, valids) - assert.ElementsMatch(t, []int8{1, 2, 3, 4, 5, 6}, int8s) + assert.Equal(t, []int8{1, 2, 3, 4, 5, 6}, int8s) defer r.ReleasePayloadReader() }) t.Run("TestInt16", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) + w, err := NewPayloadWriter(schemapb.DataType_Int16, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt16ToPayload([]int16{1, 2, 3}) + err = w.AddInt16ToPayload([]int16{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int16{1, 2, 3}) + err = w.AddDataToPayload([]int16{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -123,30 +128,32 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int16, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int16, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - int16s, err := r.GetInt16FromPayload() + int16s, valids, err := r.GetInt16FromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []int16{1, 2, 3, 1, 2, 3}, int16s) + assert.Nil(t, valids) + assert.Equal(t, []int16{1, 2, 3, 1, 2, 3}, int16s) - iint16s, _, err := r.GetDataFromPayload() + iint16s, valids, _, err := r.GetDataFromPayload() int16s = iint16s.([]int16) assert.NoError(t, err) - assert.ElementsMatch(t, []int16{1, 2, 3, 1, 2, 3}, int16s) + assert.Nil(t, valids) + assert.Equal(t, []int16{1, 2, 3, 1, 2, 3}, int16s) defer r.ReleasePayloadReader() }) t.Run("TestInt32", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) + w, err := NewPayloadWriter(schemapb.DataType_Int32, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt32ToPayload([]int32{1, 2, 3}) + err = w.AddInt32ToPayload([]int32{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int32{1, 2, 3}) + err = w.AddDataToPayload([]int32{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -159,31 +166,33 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int32, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int32, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - int32s, err := r.GetInt32FromPayload() + int32s, valids, err := r.GetInt32FromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []int32{1, 2, 3, 1, 2, 3}, int32s) + assert.Equal(t, []int32{1, 2, 3, 1, 2, 3}, int32s) + assert.Nil(t, valids) - iint32s, _, err := r.GetDataFromPayload() + iint32s, valids, _, err := r.GetDataFromPayload() int32s = iint32s.([]int32) assert.NoError(t, err) - assert.ElementsMatch(t, []int32{1, 2, 3, 1, 2, 3}, int32s) + assert.Equal(t, []int32{1, 2, 3, 1, 2, 3}, int32s) + assert.Nil(t, valids) defer r.ReleasePayloadReader() }) t.Run("TestInt64", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) + w, err := NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt64ToPayload([]int64{1, 2, 3}) + err = w.AddInt64ToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]int64{1, 2, 3}) + err = w.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -196,31 +205,33 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int64, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int64, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - int64s, err := r.GetInt64FromPayload() + int64s, valids, err := r.GetInt64FromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []int64{1, 2, 3, 1, 2, 3}, int64s) + assert.Equal(t, []int64{1, 2, 3, 1, 2, 3}, int64s) + assert.Nil(t, valids) - iint64s, _, err := r.GetDataFromPayload() + iint64s, valids, _, err := r.GetDataFromPayload() int64s = iint64s.([]int64) assert.NoError(t, err) - assert.ElementsMatch(t, []int64{1, 2, 3, 1, 2, 3}, int64s) + assert.Equal(t, []int64{1, 2, 3, 1, 2, 3}, int64s) + assert.Nil(t, valids) defer r.ReleasePayloadReader() }) t.Run("TestFloat32", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) + w, err := NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddFloatToPayload([]float32{1.0, 2.0, 3.0}) + err = w.AddFloatToPayload([]float32{1.0, 2.0, 3.0}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]float32{1.0, 2.0, 3.0}) + err = w.AddDataToPayload([]float32{1.0, 2.0, 3.0}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -233,31 +244,33 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Float, buffer) + r, err := NewPayloadReader(schemapb.DataType_Float, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - float32s, err := r.GetFloatFromPayload() + float32s, valids, err := r.GetFloatFromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) + assert.Equal(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) + assert.Nil(t, valids) - ifloat32s, _, err := r.GetDataFromPayload() + ifloat32s, valids, _, err := r.GetDataFromPayload() float32s = ifloat32s.([]float32) assert.NoError(t, err) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) + assert.Equal(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) + assert.Nil(t, valids) defer r.ReleasePayloadReader() }) t.Run("TestDouble", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) + w, err := NewPayloadWriter(schemapb.DataType_Double, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddDoubleToPayload([]float64{1.0, 2.0, 3.0}) + err = w.AddDoubleToPayload([]float64{1.0, 2.0, 3.0}, nil) assert.NoError(t, err) - err = w.AddDataToPayload([]float64{1.0, 2.0, 3.0}) + err = w.AddDataToPayload([]float64{1.0, 2.0, 3.0}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -270,35 +283,37 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Double, buffer) + r, err := NewPayloadReader(schemapb.DataType_Double, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 6) - float64s, err := r.GetDoubleFromPayload() + float64s, valids, err := r.GetDoubleFromPayload() assert.NoError(t, err) - assert.ElementsMatch(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) + assert.Equal(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) + assert.Nil(t, valids) - ifloat64s, _, err := r.GetDataFromPayload() + ifloat64s, valids, _, err := r.GetDataFromPayload() float64s = ifloat64s.([]float64) assert.NoError(t, err) - assert.ElementsMatch(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) + assert.Nil(t, valids) + assert.Equal(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) defer r.ReleasePayloadReader() }) t.Run("TestAddString", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneStringToPayload("hello0") + err = w.AddOneStringToPayload("hello0", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello1") + err = w.AddOneStringToPayload("hello1", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello2") + err = w.AddOneStringToPayload("hello2", true) assert.NoError(t, err) - err = w.AddDataToPayload("hello3") + err = w.AddDataToPayload("hello3", nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -308,33 +323,35 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) assert.NoError(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 4) - str, err := r.GetStringFromPayload() + str, valids, err := r.GetStringFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) assert.Equal(t, str[0], "hello0") assert.Equal(t, str[1], "hello1") assert.Equal(t, str[2], "hello2") assert.Equal(t, str[3], "hello3") - istr, _, err := r.GetDataFromPayload() + istr, valids, _, err := r.GetDataFromPayload() strArray := istr.([]string) assert.NoError(t, err) assert.Equal(t, strArray[0], "hello0") assert.Equal(t, strArray[1], "hello1") assert.Equal(t, strArray[2], "hello2") assert.Equal(t, strArray[3], "hello3") + assert.Nil(t, valids) r.ReleasePayloadReader() w.ReleasePayloadWriter() }) t.Run("TestAddArray", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Array) + w, err := NewPayloadWriter(schemapb.DataType_Array, false) require.Nil(t, err) require.NotNil(t, w) @@ -344,7 +361,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { Data: []int32{1, 2}, }, }, - }) + }, true) assert.NoError(t, err) err = w.AddOneArrayToPayload(&schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -352,7 +369,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { Data: []int32{3, 4}, }, }, - }) + }, true) assert.NoError(t, err) err = w.AddOneArrayToPayload(&schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -360,7 +377,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { Data: []int32{5, 6}, }, }, - }) + }, true) assert.NoError(t, err) err = w.AddDataToPayload(&schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -368,7 +385,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { Data: []int32{7, 8}, }, }, - }) + }, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -378,23 +395,25 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Array, buffer) + r, err := NewPayloadReader(schemapb.DataType_Array, buffer, false) assert.NoError(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 4) - arrayList, err := r.GetArrayFromPayload() + arrayList, valids, err := r.GetArrayFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) assert.EqualValues(t, []int32{3, 4}, arrayList[1].GetIntData().GetData()) assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) assert.EqualValues(t, []int32{7, 8}, arrayList[3].GetIntData().GetData()) - iArrayList, _, err := r.GetDataFromPayload() + iArrayList, valids, _, err := r.GetDataFromPayload() arrayList = iArrayList.([]*schemapb.ScalarField) assert.NoError(t, err) + assert.Nil(t, valids) assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) assert.EqualValues(t, []int32{3, 4}, arrayList[1].GetIntData().GetData()) assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) @@ -404,17 +423,17 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestAddJSON", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_JSON) + w, err := NewPayloadWriter(schemapb.DataType_JSON, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneJSONToPayload([]byte(`{"1":"1"}`)) + err = w.AddOneJSONToPayload([]byte(`{"1":"1"}`), true) assert.NoError(t, err) - err = w.AddOneJSONToPayload([]byte(`{"2":"2"}`)) + err = w.AddOneJSONToPayload([]byte(`{"2":"2"}`), true) assert.NoError(t, err) - err = w.AddOneJSONToPayload([]byte(`{"3":"3"}`)) + err = w.AddOneJSONToPayload([]byte(`{"3":"3"}`), true) assert.NoError(t, err) - err = w.AddDataToPayload([]byte(`{"4":"4"}`)) + err = w.AddDataToPayload([]byte(`{"4":"4"}`), nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -424,23 +443,25 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_JSON, buffer) + r, err := NewPayloadReader(schemapb.DataType_JSON, buffer, false) assert.NoError(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) assert.Equal(t, length, 4) - json, err := r.GetJSONFromPayload() + json, valids, err := r.GetJSONFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) assert.EqualValues(t, []byte(`{"2":"2"}`), json[1]) assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) assert.EqualValues(t, []byte(`{"4":"4"}`), json[3]) - iJSON, _, err := r.GetDataFromPayload() + iJSON, valids, _, err := r.GetDataFromPayload() json = iJSON.([][]byte) assert.NoError(t, err) + assert.Nil(t, valids) assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) assert.EqualValues(t, []byte(`{"2":"2"}`), json[1]) assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) @@ -450,7 +471,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestBinaryVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) require.Nil(t, err) require.NotNil(t, w) @@ -465,7 +486,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { err = w.AddBinaryVectorToPayload(in, 8) assert.NoError(t, err) - err = w.AddDataToPayload(in2, 8) + err = w.AddDataToPayload(in2, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -478,7 +499,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -489,7 +510,8 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Equal(t, 8, dim) assert.Equal(t, 24, len(binVecs)) - ibinVecs, dim, err := r.GetDataFromPayload() + ibinVecs, valids, dim, err := r.GetDataFromPayload() + assert.Nil(t, valids) assert.NoError(t, err) binVecs = ibinVecs.([]byte) assert.Equal(t, 8, dim) @@ -498,13 +520,13 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestFloatVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 1) + w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 1) require.Nil(t, err) require.NotNil(t, w) err = w.AddFloatVectorToPayload([]float32{1.0, 2.0}, 1) assert.NoError(t, err) - err = w.AddDataToPayload([]float32{3.0, 4.0}, 1) + err = w.AddDataToPayload([]float32{3.0, 4.0}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -517,7 +539,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -527,25 +549,26 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, dim) assert.Equal(t, 4, len(floatVecs)) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) + assert.Equal(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) - ifloatVecs, dim, err := r.GetDataFromPayload() + ifloatVecs, valids, dim, err := r.GetDataFromPayload() + assert.Nil(t, valids) assert.NoError(t, err) floatVecs = ifloatVecs.([]float32) assert.Equal(t, 1, dim) assert.Equal(t, 4, len(floatVecs)) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) + assert.Equal(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) defer r.ReleasePayloadReader() }) t.Run("TestFloat16Vector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, 1) + w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, false, 1) require.Nil(t, err) require.NotNil(t, w) err = w.AddFloat16VectorToPayload([]byte{1, 2}, 1) assert.NoError(t, err) - err = w.AddDataToPayload([]byte{3, 4}, 1) + err = w.AddDataToPayload([]byte{3, 4}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -558,7 +581,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Float16Vector, buffer) + r, err := NewPayloadReader(schemapb.DataType_Float16Vector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -570,8 +593,9 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Equal(t, 4, len(float16Vecs)) assert.ElementsMatch(t, []byte{1, 2, 3, 4}, float16Vecs) - ifloat16Vecs, dim, err := r.GetDataFromPayload() + ifloat16Vecs, valids, dim, err := r.GetDataFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) float16Vecs = ifloat16Vecs.([]byte) assert.Equal(t, 1, dim) assert.Equal(t, 4, len(float16Vecs)) @@ -580,13 +604,13 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestBFloat16Vector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, 1) + w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, false, 1) require.Nil(t, err) require.NotNil(t, w) err = w.AddBFloat16VectorToPayload([]byte{1, 2}, 1) assert.NoError(t, err) - err = w.AddDataToPayload([]byte{3, 4}, 1) + err = w.AddDataToPayload([]byte{3, 4}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -599,7 +623,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_BFloat16Vector, buffer) + r, err := NewPayloadReader(schemapb.DataType_BFloat16Vector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -611,8 +635,9 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Equal(t, 4, len(bfloat16Vecs)) assert.ElementsMatch(t, []byte{1, 2, 3, 4}, bfloat16Vecs) - ibfloat16Vecs, dim, err := r.GetDataFromPayload() + ibfloat16Vecs, valids, dim, err := r.GetDataFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) bfloat16Vecs = ibfloat16Vecs.([]byte) assert.Equal(t, 1, dim) assert.Equal(t, 4, len(bfloat16Vecs)) @@ -621,7 +646,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestSparseFloatVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector) + w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) require.Nil(t, err) require.NotNil(t, w) @@ -658,7 +683,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -681,15 +706,16 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }, }, floatVecs.SparseFloatArray) - ifloatVecs, dim, err := r.GetDataFromPayload() + ifloatVecs, valids, dim, err := r.GetDataFromPayload() assert.NoError(t, err) + assert.Nil(t, valids) assert.Equal(t, floatVecs, ifloatVecs.(*SparseFloatVectorFieldData)) assert.Equal(t, 600, dim) defer r.ReleasePayloadReader() }) testSparseOneBatch := func(t *testing.T, rows [][]byte, actualDim int) { - w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector) + w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) require.Nil(t, err) require.NotNil(t, w) @@ -711,7 +737,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer, false) require.Nil(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -726,7 +752,8 @@ func TestPayload_ReaderAndWriter(t *testing.T) { Contents: rows, }, floatVecs.SparseFloatArray) - ifloatVecs, dim, err := r.GetDataFromPayload() + ifloatVecs, valids, dim, err := r.GetDataFromPayload() + assert.Nil(t, valids) assert.NoError(t, err) assert.Equal(t, floatVecs, ifloatVecs.(*SparseFloatVectorFieldData)) assert.Equal(t, actualDim, dim) @@ -808,23 +835,23 @@ func TestPayload_ReaderAndWriter(t *testing.T) { // }) t.Run("TestAddBoolAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddBoolToPayload([]bool{}) + err = w.AddBoolToPayload([]bool{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddBoolToPayload([]bool{false}) + err = w.AddBoolToPayload([]bool{false}, nil) assert.Error(t, err) }) t.Run("TestAddInt8AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -832,15 +859,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddInt8ToPayload([]int8{}) + err = w.AddInt8ToPayload([]int8{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddInt8ToPayload([]int8{0}) + err = w.AddInt8ToPayload([]int8{0}, nil) assert.Error(t, err) }) t.Run("TestAddInt16AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) + w, err := NewPayloadWriter(schemapb.DataType_Int16, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -848,15 +875,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddInt16ToPayload([]int16{}) + err = w.AddInt16ToPayload([]int16{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddInt16ToPayload([]int16{0}) + err = w.AddInt16ToPayload([]int16{0}, nil) assert.Error(t, err) }) t.Run("TestAddInt32AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) + w, err := NewPayloadWriter(schemapb.DataType_Int32, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -864,15 +891,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddInt32ToPayload([]int32{}) + err = w.AddInt32ToPayload([]int32{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddInt32ToPayload([]int32{0}) + err = w.AddInt32ToPayload([]int32{0}, nil) assert.Error(t, err) }) t.Run("TestAddInt64AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) + w, err := NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -880,15 +907,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddInt64ToPayload([]int64{}) + err = w.AddInt64ToPayload([]int64{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddInt64ToPayload([]int64{0}) + err = w.AddInt64ToPayload([]int64{0}, nil) assert.Error(t, err) }) t.Run("TestAddFloatAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) + w, err := NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -896,15 +923,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddFloatToPayload([]float32{}) + err = w.AddFloatToPayload([]float32{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddFloatToPayload([]float32{0.0}) + err = w.AddFloatToPayload([]float32{0.0}, nil) assert.Error(t, err) }) t.Run("TestAddDoubleAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) + w, err := NewPayloadWriter(schemapb.DataType_Double, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -912,15 +939,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddDoubleToPayload([]float64{}) + err = w.AddDoubleToPayload([]float64{}, nil) assert.Error(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddDoubleToPayload([]float64{0.0}) + err = w.AddDoubleToPayload([]float64{0.0}, nil) assert.Error(t, err) }) t.Run("TestAddOneStringAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -928,15 +955,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { _, err = w.GetPayloadBufferFromWriter() assert.Error(t, err) - err = w.AddOneStringToPayload("") + err = w.AddOneStringToPayload("", true) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) - err = w.AddOneStringToPayload("c") + err = w.AddOneStringToPayload("c", true) assert.Error(t, err) }) t.Run("TestAddBinVectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -960,7 +987,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestAddFloatVectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -981,7 +1008,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestAddFloat16VectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, 8) + w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, false, 8) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -1005,7 +1032,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestAddBFloat16VectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, 8) + w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, false, 8) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -1029,7 +1056,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestAddSparseFloatVectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector) + w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) require.Nil(t, err) require.NotNil(t, w) defer w.Close() @@ -1061,7 +1088,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestNewReadError", func(t *testing.T) { buffer := []byte{0} - r, err := NewPayloadReader(999, buffer) + r, err := NewPayloadReader(999, buffer, false) assert.Error(t, err) assert.Nil(t, r) }) @@ -1069,15 +1096,15 @@ func TestPayload_ReaderAndWriter(t *testing.T) { r := PayloadReader{} r.colType = 999 - _, _, err := r.GetDataFromPayload() + _, _, _, err := r.GetDataFromPayload() assert.Error(t, err) }) t.Run("TestGetBoolError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt8ToPayload([]int8{1, 2, 3}) + err = w.AddInt8ToPayload([]int8{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1086,22 +1113,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Bool, buffer) + r, err := NewPayloadReader(schemapb.DataType_Bool, buffer, false) assert.NoError(t, err) - _, err = r.GetBoolFromPayload() + _, _, err = r.GetBoolFromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetBoolFromPayload() + _, _, err = r.GetBoolFromPayload() assert.Error(t, err) }) t.Run("TestGetBoolError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{true, false, true}) + err = w.AddBoolToPayload([]bool{true, false, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1110,19 +1137,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Bool, buffer) + r, err := NewPayloadReader(schemapb.DataType_Bool, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetBoolFromPayload() + _, _, err = r.GetBoolFromPayload() assert.Error(t, err) }) t.Run("TestGetInt8Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1131,22 +1158,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int8, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int8, buffer, false) assert.NoError(t, err) - _, err = r.GetInt8FromPayload() + _, _, err = r.GetInt8FromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetInt8FromPayload() + _, _, err = r.GetInt8FromPayload() assert.Error(t, err) }) t.Run("TestGetInt8Error2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt8ToPayload([]int8{1, 2, 3}) + err = w.AddInt8ToPayload([]int8{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1155,19 +1182,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int8, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int8, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetInt8FromPayload() + _, _, err = r.GetInt8FromPayload() assert.Error(t, err) }) t.Run("TestGetInt16Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1176,22 +1203,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int16, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int16, buffer, false) assert.NoError(t, err) - _, err = r.GetInt16FromPayload() + _, _, err = r.GetInt16FromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetInt16FromPayload() + _, _, err = r.GetInt16FromPayload() assert.Error(t, err) }) t.Run("TestGetInt16Error2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) + w, err := NewPayloadWriter(schemapb.DataType_Int16, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt16ToPayload([]int16{1, 2, 3}) + err = w.AddInt16ToPayload([]int16{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1200,19 +1227,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int16, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int16, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetInt16FromPayload() + _, _, err = r.GetInt16FromPayload() assert.Error(t, err) }) t.Run("TestGetInt32Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1221,22 +1248,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int32, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int32, buffer, false) assert.NoError(t, err) - _, err = r.GetInt32FromPayload() + _, _, err = r.GetInt32FromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetInt32FromPayload() + _, _, err = r.GetInt32FromPayload() assert.Error(t, err) }) t.Run("TestGetInt32Error2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) + w, err := NewPayloadWriter(schemapb.DataType_Int32, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt32ToPayload([]int32{1, 2, 3}) + err = w.AddInt32ToPayload([]int32{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1245,19 +1272,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int32, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int32, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetInt32FromPayload() + _, _, err = r.GetInt32FromPayload() assert.Error(t, err) }) t.Run("TestGetInt64Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1266,22 +1293,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int64, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int64, buffer, false) assert.NoError(t, err) - _, err = r.GetInt64FromPayload() + _, _, err = r.GetInt64FromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetInt64FromPayload() + _, _, err = r.GetInt64FromPayload() assert.Error(t, err) }) t.Run("TestGetInt64Error2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) + w, err := NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt64ToPayload([]int64{1, 2, 3}) + err = w.AddInt64ToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1290,19 +1317,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Int64, buffer) + r, err := NewPayloadReader(schemapb.DataType_Int64, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetInt64FromPayload() + _, _, err = r.GetInt64FromPayload() assert.Error(t, err) }) t.Run("TestGetFloatError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1311,22 +1338,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Float, buffer) + r, err := NewPayloadReader(schemapb.DataType_Float, buffer, false) assert.NoError(t, err) - _, err = r.GetFloatFromPayload() + _, _, err = r.GetFloatFromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetFloatFromPayload() + _, _, err = r.GetFloatFromPayload() assert.Error(t, err) }) t.Run("TestGetFloatError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) + w, err := NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddFloatToPayload([]float32{1, 2, 3}) + err = w.AddFloatToPayload([]float32{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1335,19 +1362,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Float, buffer) + r, err := NewPayloadReader(schemapb.DataType_Float, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetFloatFromPayload() + _, _, err = r.GetFloatFromPayload() assert.Error(t, err) }) t.Run("TestGetDoubleError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1356,22 +1383,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Double, buffer) + r, err := NewPayloadReader(schemapb.DataType_Double, buffer, false) assert.NoError(t, err) - _, err = r.GetDoubleFromPayload() + _, _, err = r.GetDoubleFromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetDoubleFromPayload() + _, _, err = r.GetDoubleFromPayload() assert.Error(t, err) }) t.Run("TestGetDoubleError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) + w, err := NewPayloadWriter(schemapb.DataType_Double, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddDoubleToPayload([]float64{1, 2, 3}) + err = w.AddDoubleToPayload([]float64{1, 2, 3}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1380,19 +1407,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Double, buffer) + r, err := NewPayloadReader(schemapb.DataType_Double, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetDoubleFromPayload() + _, _, err = r.GetDoubleFromPayload() assert.Error(t, err) }) t.Run("TestGetStringError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1401,26 +1428,26 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) assert.NoError(t, err) - _, err = r.GetStringFromPayload() + _, _, err = r.GetStringFromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetStringFromPayload() + _, _, err = r.GetStringFromPayload() assert.Error(t, err) }) t.Run("TestGetStringError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneStringToPayload("hello0") + err = w.AddOneStringToPayload("hello0", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello1") + err = w.AddOneStringToPayload("hello1", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello2") + err = w.AddOneStringToPayload("hello2", true) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1429,19 +1456,19 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) assert.NoError(t, err) r.numRows = 99 - _, err = r.GetStringFromPayload() + _, _, err = r.GetStringFromPayload() assert.Error(t, err) }) t.Run("TestGetArrayError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1450,22 +1477,22 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_Array, buffer) + r, err := NewPayloadReader(schemapb.DataType_Array, buffer, false) assert.NoError(t, err) - _, err = r.GetArrayFromPayload() + _, _, err = r.GetArrayFromPayload() assert.Error(t, err) r.colType = 999 - _, err = r.GetArrayFromPayload() + _, _, err = r.GetArrayFromPayload() assert.Error(t, err) }) t.Run("TestGetBinaryVectorError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1474,7 +1501,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer, false) assert.NoError(t, err) _, _, err = r.GetBinaryVectorFromPayload() @@ -1485,7 +1512,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestGetBinaryVectorError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) require.Nil(t, err) require.NotNil(t, w) @@ -1498,7 +1525,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_BinaryVector, buffer, false) assert.NoError(t, err) r.numRows = 99 @@ -1506,11 +1533,11 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestGetFloatVectorError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false, true, true}) + err = w.AddBoolToPayload([]bool{false, true, true}, nil) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1519,7 +1546,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer, false) assert.NoError(t, err) _, _, err = r.GetFloatVectorFromPayload() @@ -1530,7 +1557,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) t.Run("TestGetFloatVectorError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) require.Nil(t, err) require.NotNil(t, w) @@ -1543,7 +1570,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer, false) assert.NoError(t, err) r.numRows = 99 @@ -1552,11 +1579,11 @@ func TestPayload_ReaderAndWriter(t *testing.T) { }) t.Run("TestByteArrayDatasetError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneStringToPayload("hello0") + err = w.AddOneStringToPayload("hello0", true) assert.NoError(t, err) err = w.FinishPayloadWriter() @@ -1565,7 +1592,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer) + r, err := NewPayloadReader(schemapb.DataType_FloatVector, buffer, false) assert.NoError(t, err) r.colType = 99 @@ -1584,60 +1611,6 @@ func TestPayload_ReaderAndWriter(t *testing.T) { assert.Error(t, err) }) - t.Run("TestGetSparseFloatVectorError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.NoError(t, err) - - err = w.FinishPayloadWriter() - assert.NoError(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.NoError(t, err) - - r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer) - assert.NoError(t, err) - - _, _, err = r.GetSparseFloatVectorFromPayload() - assert.Error(t, err) - - r.colType = 999 - _, _, err = r.GetSparseFloatVectorFromPayload() - assert.Error(t, err) - }) - - t.Run("TestGetSparseFloatVectorError2", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{ - SparseFloatArray: schemapb.SparseFloatArray{ - Dim: 53, - Contents: [][]byte{ - typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), - }, - }, - }) - assert.NoError(t, err) - - err = w.FinishPayloadWriter() - assert.NoError(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.NoError(t, err) - - r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer) - assert.NoError(t, err) - - r.numRows = 99 - _, _, err = r.GetSparseFloatVectorFromPayload() - assert.Error(t, err) - }) - t.Run("TestWriteLargeSizeData", func(t *testing.T) { t.Skip("Large data skip for online ut") size := 1 << 29 // 512M @@ -1646,7 +1619,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) { vec = append(vec, 1) } - w, err := NewPayloadWriter(schemapb.DataType_FloatVector) + w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false) assert.NoError(t, err) err = w.AddFloatVectorToPayload(vec, 128) @@ -1660,19 +1633,737 @@ func TestPayload_ReaderAndWriter(t *testing.T) { w.ReleasePayloadWriter() }) + + t.Run("TestAddBool with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddBoolToPayload([]bool{false}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt8 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt8ToPayload([]int8{1}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt16 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int16, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt16ToPayload([]int16{1}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt32 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int32, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt32ToPayload([]int32{1}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt64 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int64, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt64ToPayload([]int64{1}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Float, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddFloatToPayload([]float32{1.0}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddDouble with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Double, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddDoubleToPayload([]float64{1.0}, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddAddString with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_String, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneStringToPayload("hello0", false) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddArray with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Array, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, false) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddJSON with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_JSON, false) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneJSONToPayload([]byte(`{"1":"1"}`), false) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) +} + +func TestPayload_NullableReaderAndWriter(t *testing.T) { + t.Run("TestBool", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Bool, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddBoolToPayload([]bool{true, false, false, false}, []bool{true, false, true, false}) + assert.NoError(t, err) + err = w.AddDataToPayload([]bool{true, false, false, false}, []bool{true, false, true, false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 8, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Bool, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 8) + bools, valids, err := r.GetBoolFromPayload() + assert.NoError(t, err) + assert.Equal(t, []bool{true, false, false, false, true, false, false, false}, bools) + assert.Equal(t, []bool{true, false, true, false, true, false, true, false}, valids) + ibools, valids, _, err := r.GetDataFromPayload() + bools = ibools.([]bool) + assert.NoError(t, err) + assert.Equal(t, []bool{true, false, false, false, true, false, false, false}, bools) + assert.Equal(t, []bool{true, false, true, false, true, false, true, false}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestInt8", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int8, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt8ToPayload([]int8{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]int8{4, 5, 6}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Int8, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + + int8s, valids, err := r.GetInt8FromPayload() + assert.NoError(t, err) + assert.Equal(t, []int8{1, 0, 3, 4, 0, 6}, int8s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + + iint8s, valids, _, err := r.GetDataFromPayload() + int8s = iint8s.([]int8) + assert.NoError(t, err) + + assert.Equal(t, []int8{1, 0, 3, 4, 0, 6}, int8s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestInt16", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int16, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt16ToPayload([]int16{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]int16{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Int16, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + int16s, valids, err := r.GetInt16FromPayload() + assert.NoError(t, err) + assert.Equal(t, []int16{1, 0, 3, 1, 0, 3}, int16s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + + iint16s, valids, _, err := r.GetDataFromPayload() + int16s = iint16s.([]int16) + assert.NoError(t, err) + assert.Equal(t, []int16{1, 0, 3, 1, 0, 3}, int16s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestInt32", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int32, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt32ToPayload([]int32{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]int32{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Int32, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + + int32s, valids, err := r.GetInt32FromPayload() + assert.NoError(t, err) + assert.Equal(t, []int32{1, 0, 3, 1, 0, 3}, int32s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + + iint32s, valids, _, err := r.GetDataFromPayload() + int32s = iint32s.([]int32) + assert.NoError(t, err) + assert.Equal(t, []int32{1, 0, 3, 1, 0, 3}, int32s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestInt64", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int64, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt64ToPayload([]int64{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]int64{1, 2, 3}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Int64, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + + int64s, valids, err := r.GetInt64FromPayload() + assert.NoError(t, err) + assert.Equal(t, []int64{1, 0, 3, 1, 0, 3}, int64s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + + iint64s, valids, _, err := r.GetDataFromPayload() + int64s = iint64s.([]int64) + assert.NoError(t, err) + assert.Equal(t, []int64{1, 0, 3, 1, 0, 3}, int64s) + assert.Equal(t, []bool{true, false, true, true, false, true}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestFloat32", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Float, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddFloatToPayload([]float32{1.0, 2.0, 3.0}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]float32{1.0, 2.0, 3.0}, []bool{false, true, false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Float, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + + float32s, valids, err := r.GetFloatFromPayload() + assert.NoError(t, err) + assert.Equal(t, []float32{1.0, 0, 3.0, 0, 2.0, 0}, float32s) + assert.Equal(t, []bool{true, false, true, false, true, false}, valids) + + ifloat32s, valids, _, err := r.GetDataFromPayload() + float32s = ifloat32s.([]float32) + assert.NoError(t, err) + assert.Equal(t, []float32{1.0, 0, 3.0, 0, 2.0, 0}, float32s) + assert.Equal(t, []bool{true, false, true, false, true, false}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestDouble", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Double, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddDoubleToPayload([]float64{1.0, 2.0, 3.0}, []bool{true, false, true}) + assert.NoError(t, err) + err = w.AddDataToPayload([]float64{1.0, 2.0, 3.0}, []bool{false, true, false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, 6, length) + defer w.ReleasePayloadWriter() + + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Double, buffer, true) + require.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 6) + + float64s, valids, err := r.GetDoubleFromPayload() + assert.NoError(t, err) + assert.Equal(t, []float64{1.0, 0, 3.0, 0, 2.0, 0}, float64s) + assert.Equal(t, []bool{true, false, true, false, true, false}, valids) + + ifloat64s, valids, _, err := r.GetDataFromPayload() + float64s = ifloat64s.([]float64) + assert.NoError(t, err) + assert.Equal(t, []float64{1.0, 0, 3.0, 0, 2.0, 0}, float64s) + assert.Equal(t, []bool{true, false, true, false, true, false}, valids) + defer r.ReleasePayloadReader() + }) + + t.Run("TestAddString", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_String, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneStringToPayload("hello0", true) + assert.NoError(t, err) + err = w.AddOneStringToPayload("hello1", false) + assert.NoError(t, err) + err = w.AddOneStringToPayload("hello2", true) + assert.NoError(t, err) + err = w.AddDataToPayload("hello3", []bool{false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, length, 4) + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_String, buffer, true) + assert.NoError(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 4) + + str, valids, err := r.GetStringFromPayload() + assert.NoError(t, err) + + assert.Equal(t, str[0], "hello0") + assert.Equal(t, str[1], "") + assert.Equal(t, str[2], "hello2") + assert.Equal(t, str[3], "") + assert.Equal(t, []bool{true, false, true, false}, valids) + + istr, valids, _, err := r.GetDataFromPayload() + strArray := istr.([]string) + assert.NoError(t, err) + assert.Equal(t, strArray[0], "hello0") + assert.Equal(t, strArray[1], "") + assert.Equal(t, strArray[2], "hello2") + assert.Equal(t, strArray[3], "") + assert.Equal(t, []bool{true, false, true, false}, valids) + r.ReleasePayloadReader() + w.ReleasePayloadWriter() + }) + + t.Run("TestAddArray", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Array, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, true) + assert.NoError(t, err) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{3, 4}, + }, + }, + }, false) + assert.NoError(t, err) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{5, 6}, + }, + }, + }, true) + assert.NoError(t, err) + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{7, 8}, + }, + }, + }, []bool{false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, length, 4) + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Array, buffer, true) + assert.NoError(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 4) + + arrayList, valids, err := r.GetArrayFromPayload() + assert.NoError(t, err) + + assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) + assert.EqualValues(t, []int32(nil), arrayList[1].GetIntData().GetData()) + assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) + assert.EqualValues(t, []int32(nil), arrayList[3].GetIntData().GetData()) + assert.Equal(t, []bool{true, false, true, false}, valids) + + iArrayList, valids, _, err := r.GetDataFromPayload() + arrayList = iArrayList.([]*schemapb.ScalarField) + assert.NoError(t, err) + assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) + assert.EqualValues(t, []int32(nil), arrayList[1].GetIntData().GetData()) + assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) + assert.EqualValues(t, []int32(nil), arrayList[3].GetIntData().GetData()) + assert.Equal(t, []bool{true, false, true, false}, valids) + r.ReleasePayloadReader() + w.ReleasePayloadWriter() + }) + + t.Run("TestAddJSON", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_JSON, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneJSONToPayload([]byte(`{"1":"1"}`), true) + assert.NoError(t, err) + err = w.AddOneJSONToPayload([]byte(`{"2":"2"}`), false) + assert.NoError(t, err) + err = w.AddOneJSONToPayload([]byte(`{"3":"3"}`), true) + assert.NoError(t, err) + err = w.AddDataToPayload([]byte(`{"4":"4"}`), []bool{false}) + assert.NoError(t, err) + err = w.FinishPayloadWriter() + assert.NoError(t, err) + length, err := w.GetPayloadLengthFromWriter() + assert.NoError(t, err) + assert.Equal(t, length, 4) + buffer, err := w.GetPayloadBufferFromWriter() + assert.NoError(t, err) + + r, err := NewPayloadReader(schemapb.DataType_JSON, buffer, true) + assert.NoError(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.NoError(t, err) + assert.Equal(t, length, 4) + + json, valids, err := r.GetJSONFromPayload() + assert.NoError(t, err) + + assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) + assert.EqualValues(t, []byte(``), json[1]) + assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) + assert.EqualValues(t, []byte(``), json[3]) + assert.Equal(t, []bool{true, false, true, false}, valids) + + iJSON, valids, _, err := r.GetDataFromPayload() + json = iJSON.([][]byte) + assert.NoError(t, err) + assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) + assert.EqualValues(t, []byte(``), json[1]) + assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) + assert.EqualValues(t, []byte(``), json[3]) + assert.Equal(t, []bool{true, false, true, false}, valids) + r.ReleasePayloadReader() + w.ReleasePayloadWriter() + }) + + t.Run("TestBinaryVector", func(t *testing.T) { + _, err := NewPayloadWriter(schemapb.DataType_BinaryVector, true, 8) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestFloatVector", func(t *testing.T) { + _, err := NewPayloadWriter(schemapb.DataType_FloatVector, true, 1) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestFloat16Vector", func(t *testing.T) { + _, err := NewPayloadWriter(schemapb.DataType_Float16Vector, true, 1) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddBool with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Bool, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddBoolToPayload([]bool{false}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt8 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int8, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt8ToPayload([]int8{1}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt16 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int16, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt16ToPayload([]int16{1}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt32 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int32, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt32ToPayload([]int32{1}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddInt64 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Int64, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddInt64ToPayload([]int64{1}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Float, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddFloatToPayload([]float32{1.0}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddDouble with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Double, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddDoubleToPayload([]float64{1.0}, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddAddString with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_String, true) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload("hello0", nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_String, true) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload("hello0", []bool{false, false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_String, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload("hello0", []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_String, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload("hello0", []bool{true}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddArray with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Array, true) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_Array, true) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, []bool{false, false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_Array, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_Array, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }, []bool{true}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) + + t.Run("TestAddJSON with wrong valids", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_JSON, true) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload([]byte(`{"1":"1"}`), nil) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_JSON, true) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false, false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_JSON, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + + w, err = NewPayloadWriter(schemapb.DataType_JSON, false) + require.Nil(t, err) + require.NotNil(t, w) + err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{true}) + assert.ErrorIs(t, err, merr.ErrParameterInvalid) + }) } func TestArrowRecordReader(t *testing.T) { t.Run("TestArrowRecordReader", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) assert.NoError(t, err) defer w.Close() - err = w.AddOneStringToPayload("hello0") + err = w.AddOneStringToPayload("hello0", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello1") + err = w.AddOneStringToPayload("hello1", true) assert.NoError(t, err) - err = w.AddOneStringToPayload("hello2") + err = w.AddOneStringToPayload("hello2", true) assert.NoError(t, err) err = w.FinishPayloadWriter() assert.NoError(t, err) @@ -1682,7 +2373,7 @@ func TestArrowRecordReader(t *testing.T) { buffer, err := w.GetPayloadBufferFromWriter() assert.NoError(t, err) - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) assert.NoError(t, err) length, err = r.GetPayloadLengthFromReader() assert.NoError(t, err) @@ -1704,7 +2395,7 @@ func TestArrowRecordReader(t *testing.T) { } func dataGen(size int) ([]byte, error) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) if err != nil { return nil, err } @@ -1717,7 +2408,7 @@ func dataGen(size int) ([]byte, error) { for i := range b { b[i] = letterRunes[rand.Intn(len(letterRunes))] } - w.AddOneStringToPayload(string(b)) + w.AddOneStringToPayload(string(b), true) } err = w.FinishPayloadWriter() if err != nil { @@ -1736,7 +2427,7 @@ func BenchmarkDefaultReader(b *testing.B) { assert.NoError(b, err) b.ResetTimer() - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) require.Nil(b, err) defer r.ReleasePayloadReader() @@ -1744,8 +2435,9 @@ func BenchmarkDefaultReader(b *testing.B) { assert.NoError(b, err) assert.Equal(b, length, size) - d, err := r.GetStringFromPayload() + d, v, err := r.GetStringFromPayload() assert.NoError(b, err) + assert.Nil(b, v) for i := 0; i < 100; i++ { for _, de := range d { assert.Equal(b, 20, len(de)) @@ -1759,7 +2451,7 @@ func BenchmarkDataSetReader(b *testing.B) { assert.NoError(b, err) b.ResetTimer() - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) require.Nil(b, err) defer r.ReleasePayloadReader() @@ -1787,7 +2479,7 @@ func BenchmarkArrowRecordReader(b *testing.B) { assert.NoError(b, err) b.ResetTimer() - r, err := NewPayloadReader(schemapb.DataType_String, buffer) + r, err := NewPayloadReader(schemapb.DataType_String, buffer, false) require.Nil(b, err) defer r.ReleasePayloadReader() diff --git a/internal/storage/payload_writer.go b/internal/storage/payload_writer.go index d9a8f1f11f..8b8b001005 100644 --- a/internal/storage/payload_writer.go +++ b/internal/storage/payload_writer.go @@ -33,6 +33,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -46,18 +47,29 @@ type NativePayloadWriter struct { flushedRows int output *bytes.Buffer releaseOnce sync.Once + dim int + nullable bool } -func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInterface, error) { +func NewPayloadWriter(colType schemapb.DataType, nullable bool, dim ...int) (PayloadWriterInterface, error) { var arrowType arrow.DataType + var dimension int // writer for sparse float vector doesn't require dim if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) { if len(dim) != 1 { - return nil, fmt.Errorf("incorrect input numbers") + return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers") + } + if nullable { + return nil, merr.WrapErrParameterInvalidMsg("vector type not supprot nullable") } arrowType = milvusDataTypeToArrowType(colType, dim[0]) + dimension = dim[0] } else { + if len(dim) != 0 { + return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers") + } arrowType = milvusDataTypeToArrowType(colType, 1) + dimension = 1 } builder := array.NewBuilder(memory.DefaultAllocator, arrowType) @@ -69,117 +81,148 @@ func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInter finished: false, flushedRows: 0, output: new(bytes.Buffer), + dim: dimension, + nullable: nullable, }, nil } -func (w *NativePayloadWriter) AddDataToPayload(data interface{}, dim ...int) error { - switch len(dim) { - case 0: - switch w.dataType { - case schemapb.DataType_Bool: - val, ok := data.([]bool) - if !ok { - return errors.New("incorrect data type") - } - return w.AddBoolToPayload(val) - case schemapb.DataType_Int8: - val, ok := data.([]int8) - if !ok { - return errors.New("incorrect data type") - } - return w.AddInt8ToPayload(val) - case schemapb.DataType_Int16: - val, ok := data.([]int16) - if !ok { - return errors.New("incorrect data type") - } - return w.AddInt16ToPayload(val) - case schemapb.DataType_Int32: - val, ok := data.([]int32) - if !ok { - return errors.New("incorrect data type") - } - return w.AddInt32ToPayload(val) - case schemapb.DataType_Int64: - val, ok := data.([]int64) - if !ok { - return errors.New("incorrect data type") - } - return w.AddInt64ToPayload(val) - case schemapb.DataType_Float: - val, ok := data.([]float32) - if !ok { - return errors.New("incorrect data type") - } - return w.AddFloatToPayload(val) - case schemapb.DataType_Double: - val, ok := data.([]float64) - if !ok { - return errors.New("incorrect data type") - } - return w.AddDoubleToPayload(val) - case schemapb.DataType_String, schemapb.DataType_VarChar: - val, ok := data.(string) - if !ok { - return errors.New("incorrect data type") - } - return w.AddOneStringToPayload(val) - case schemapb.DataType_Array: - val, ok := data.(*schemapb.ScalarField) - if !ok { - return errors.New("incorrect data type") - } - return w.AddOneArrayToPayload(val) - case schemapb.DataType_JSON: - val, ok := data.([]byte) - if !ok { - return errors.New("incorrect data type") - } - return w.AddOneJSONToPayload(val) - default: - return errors.New("incorrect datatype") +func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []bool) error { + switch w.dataType { + case schemapb.DataType_Bool: + val, ok := data.([]bool) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") } - case 1: - switch w.dataType { - case schemapb.DataType_BinaryVector: - val, ok := data.([]byte) - if !ok { - return errors.New("incorrect data type") - } - return w.AddBinaryVectorToPayload(val, dim[0]) - case schemapb.DataType_FloatVector: - val, ok := data.([]float32) - if !ok { - return errors.New("incorrect data type") - } - return w.AddFloatVectorToPayload(val, dim[0]) - case schemapb.DataType_Float16Vector: - val, ok := data.([]byte) - if !ok { - return errors.New("incorrect data type") - } - return w.AddFloat16VectorToPayload(val, dim[0]) - case schemapb.DataType_BFloat16Vector: - val, ok := data.([]byte) - if !ok { - return errors.New("incorrect data type") - } - return w.AddBFloat16VectorToPayload(val, dim[0]) - case schemapb.DataType_SparseFloatVector: - val, ok := data.(*SparseFloatVectorFieldData) - if !ok { - return errors.New("incorrect data type") - } - return w.AddSparseFloatVectorToPayload(val) - default: - return errors.New("incorrect datatype") + return w.AddBoolToPayload(val, validData) + case schemapb.DataType_Int8: + val, ok := data.([]int8) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") } + return w.AddInt8ToPayload(val, validData) + case schemapb.DataType_Int16: + val, ok := data.([]int16) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddInt16ToPayload(val, validData) + case schemapb.DataType_Int32: + val, ok := data.([]int32) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddInt32ToPayload(val, validData) + case schemapb.DataType_Int64: + val, ok := data.([]int64) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddInt64ToPayload(val, validData) + case schemapb.DataType_Float: + val, ok := data.([]float32) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddFloatToPayload(val, validData) + case schemapb.DataType_Double: + val, ok := data.([]float64) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddDoubleToPayload(val, validData) + case schemapb.DataType_String, schemapb.DataType_VarChar: + val, ok := data.(string) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + isValid := true + if len(validData) > 1 { + return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload") + } + if len(validData) == 0 && w.nullable { + return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true") + } + if len(validData) == 1 { + if !w.nullable { + return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false") + } + isValid = validData[0] + } + return w.AddOneStringToPayload(val, isValid) + case schemapb.DataType_Array: + val, ok := data.(*schemapb.ScalarField) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + isValid := true + if len(validData) > 1 { + return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload") + } + if len(validData) == 0 && w.nullable { + return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true") + } + if len(validData) == 1 { + if !w.nullable { + return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false") + } + isValid = validData[0] + } + return w.AddOneArrayToPayload(val, isValid) + case schemapb.DataType_JSON: + val, ok := data.([]byte) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + isValid := true + if len(validData) > 1 { + return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload") + } + if len(validData) == 0 && w.nullable { + return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true") + } + if len(validData) == 1 { + if !w.nullable { + return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false") + } + isValid = validData[0] + } + return w.AddOneJSONToPayload(val, isValid) + case schemapb.DataType_BinaryVector: + val, ok := data.([]byte) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddBinaryVectorToPayload(val, w.dim) + case schemapb.DataType_FloatVector: + val, ok := data.([]float32) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddFloatVectorToPayload(val, w.dim) + case schemapb.DataType_Float16Vector: + val, ok := data.([]byte) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddFloat16VectorToPayload(val, w.dim) + case schemapb.DataType_BFloat16Vector: + val, ok := data.([]byte) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddBFloat16VectorToPayload(val, w.dim) + case schemapb.DataType_SparseFloatVector: + val, ok := data.(*SparseFloatVectorFieldData) + if !ok { + return merr.WrapErrParameterInvalidMsg("incorrect data type") + } + return w.AddSparseFloatVectorToPayload(val) default: - return errors.New("incorrect input numbers") + return errors.New("unsupported datatype") } } -func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error { +func (w *NativePayloadWriter) AddBoolToPayload(data []bool, validData []bool) error { if w.finished { return errors.New("can't append data to finished bool payload") } @@ -188,16 +231,26 @@ func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error { return errors.New("can't add empty msgs into bool payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.BooleanBuilder) if !ok { return errors.New("failed to cast ArrayBuilder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddByteToPayload(data []byte) error { +func (w *NativePayloadWriter) AddByteToPayload(data []byte, validData []bool) error { if w.finished { return errors.New("can't append data to finished byte payload") } @@ -206,6 +259,16 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error { return errors.New("can't add empty msgs into byte payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Int8Builder) if !ok { return errors.New("failed to cast ByteBuilder") @@ -214,12 +277,15 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error { builder.Reserve(len(data)) for i := range data { builder.Append(int8(data[i])) + if w.nullable && !validData[i] { + builder.AppendNull() + } } return nil } -func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error { +func (w *NativePayloadWriter) AddInt8ToPayload(data []int8, validData []bool) error { if w.finished { return errors.New("can't append data to finished int8 payload") } @@ -228,16 +294,26 @@ func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error { return errors.New("can't add empty msgs into int8 payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Int8Builder) if !ok { return errors.New("failed to cast Int8Builder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error { +func (w *NativePayloadWriter) AddInt16ToPayload(data []int16, validData []bool) error { if w.finished { return errors.New("can't append data to finished int16 payload") } @@ -246,16 +322,26 @@ func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error { return errors.New("can't add empty msgs into int16 payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Int16Builder) if !ok { return errors.New("failed to cast Int16Builder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error { +func (w *NativePayloadWriter) AddInt32ToPayload(data []int32, validData []bool) error { if w.finished { return errors.New("can't append data to finished int32 payload") } @@ -264,16 +350,26 @@ func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error { return errors.New("can't add empty msgs into int32 payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Int32Builder) if !ok { return errors.New("failed to cast Int32Builder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error { +func (w *NativePayloadWriter) AddInt64ToPayload(data []int64, validData []bool) error { if w.finished { return errors.New("can't append data to finished int64 payload") } @@ -282,16 +378,26 @@ func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error { return errors.New("can't add empty msgs into int64 payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Int64Builder) if !ok { return errors.New("failed to cast Int64Builder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error { +func (w *NativePayloadWriter) AddFloatToPayload(data []float32, validData []bool) error { if w.finished { return errors.New("can't append data to finished float payload") } @@ -300,16 +406,26 @@ func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error { return errors.New("can't add empty msgs into float payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Float32Builder) if !ok { return errors.New("failed to cast FloatBuilder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error { +func (w *NativePayloadWriter) AddDoubleToPayload(data []float64, validData []bool) error { if w.finished { return errors.New("can't append data to finished double payload") } @@ -318,35 +434,57 @@ func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error { return errors.New("can't add empty msgs into double payload") } + if !w.nullable && len(validData) != 0 { + msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData)) + return merr.WrapErrParameterInvalidMsg(msg) + } + + if w.nullable && len(data) != len(validData) { + msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data)) + return merr.WrapErrParameterInvalidMsg(msg) + } + builder, ok := w.builder.(*array.Float64Builder) if !ok { return errors.New("failed to cast DoubleBuilder") } - builder.AppendValues(data, nil) + builder.AppendValues(data, validData) return nil } -func (w *NativePayloadWriter) AddOneStringToPayload(data string) error { +func (w *NativePayloadWriter) AddOneStringToPayload(data string, isValid bool) error { if w.finished { return errors.New("can't append data to finished string payload") } + if !w.nullable && !isValid { + return merr.WrapErrParameterInvalidMsg("not support null when nullable is false") + } + builder, ok := w.builder.(*array.StringBuilder) if !ok { return errors.New("failed to cast StringBuilder") } - builder.Append(data) + if !isValid { + builder.AppendNull() + } else { + builder.Append(data) + } return nil } -func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) error { +func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField, isValid bool) error { if w.finished { return errors.New("can't append data to finished array payload") } + if !w.nullable && !isValid { + return merr.WrapErrParameterInvalidMsg("not support null when nullable is false") + } + bytes, err := proto.Marshal(data) if err != nil { return errors.New("Marshal ListValue failed") @@ -357,22 +495,34 @@ func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) e return errors.New("failed to cast BinaryBuilder") } - builder.Append(bytes) + if !isValid { + builder.AppendNull() + } else { + builder.Append(bytes) + } return nil } -func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte) error { +func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte, isValid bool) error { if w.finished { return errors.New("can't append data to finished json payload") } + if !w.nullable && !isValid { + return merr.WrapErrParameterInvalidMsg("not support null when nullable is false") + } + builder, ok := w.builder.(*array.BinaryBuilder) if !ok { return errors.New("failed to cast JsonBuilder") } - builder.Append(data) + if !isValid { + builder.AppendNull() + } else { + builder.Append(data) + } return nil } @@ -507,8 +657,9 @@ func (w *NativePayloadWriter) FinishPayloadWriter() error { w.finished = true field := arrow.Field{ - Name: "val", - Type: w.arrowType, + Name: "val", + Type: w.arrowType, + Nullable: w.nullable, } schema := arrow.NewSchema([]arrow.Field{ field, diff --git a/internal/storage/payload_writer_test.go b/internal/storage/payload_writer_test.go index e18c6b2637..0a8e5abfb4 100644 --- a/internal/storage/payload_writer_test.go +++ b/internal/storage/payload_writer_test.go @@ -9,241 +9,248 @@ import ( ) func TestPayloadWriter_Failed(t *testing.T) { + t.Run("wrong input", func(t *testing.T) { + _, err := NewPayloadWriter(schemapb.DataType_FloatVector, false) + require.Error(t, err) + + _, err = NewPayloadWriter(schemapb.DataType_Bool, false, 1) + require.Error(t, err) + }) t.Run("Test Bool", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) + w, err := NewPayloadWriter(schemapb.DataType_Bool, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{}) + err = w.AddBoolToPayload([]bool{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddBoolToPayload([]bool{false}) + err = w.AddBoolToPayload([]bool{false}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddBoolToPayload([]bool{false}) + err = w.AddBoolToPayload([]bool{false}, nil) require.Error(t, err) }) t.Run("Test Byte", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty) require.Nil(t, err) require.NotNil(t, w) - err = w.AddByteToPayload([]byte{}) + err = w.AddByteToPayload([]byte{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddByteToPayload([]byte{0}) + err = w.AddByteToPayload([]byte{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddByteToPayload([]byte{0}) + err = w.AddByteToPayload([]byte{0}, nil) require.Error(t, err) }) t.Run("Test Int8", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) + w, err := NewPayloadWriter(schemapb.DataType_Int8, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt8ToPayload([]int8{}) + err = w.AddInt8ToPayload([]int8{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddInt8ToPayload([]int8{0}) + err = w.AddInt8ToPayload([]int8{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt8ToPayload([]int8{0}) + err = w.AddInt8ToPayload([]int8{0}, nil) require.Error(t, err) }) t.Run("Test Int16", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) + w, err := NewPayloadWriter(schemapb.DataType_Int16, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt16ToPayload([]int16{}) + err = w.AddInt16ToPayload([]int16{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddInt16ToPayload([]int16{0}) + err = w.AddInt16ToPayload([]int16{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt16ToPayload([]int16{0}) + err = w.AddInt16ToPayload([]int16{0}, nil) require.Error(t, err) }) t.Run("Test Int32", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) + w, err := NewPayloadWriter(schemapb.DataType_Int32, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt32ToPayload([]int32{}) + err = w.AddInt32ToPayload([]int32{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddInt32ToPayload([]int32{0}) + err = w.AddInt32ToPayload([]int32{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt32ToPayload([]int32{0}) + err = w.AddInt32ToPayload([]int32{0}, nil) require.Error(t, err) }) t.Run("Test Int64", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) + w, err := NewPayloadWriter(schemapb.DataType_Int64, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt64ToPayload([]int64{}) + err = w.AddInt64ToPayload([]int64{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddInt64ToPayload([]int64{0}) + err = w.AddInt64ToPayload([]int64{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Float) + w, err = NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddInt64ToPayload([]int64{0}) + err = w.AddInt64ToPayload([]int64{0}, nil) require.Error(t, err) }) t.Run("Test Float", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) + w, err := NewPayloadWriter(schemapb.DataType_Float, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddFloatToPayload([]float32{}) + err = w.AddFloatToPayload([]float32{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddFloatToPayload([]float32{0}) + err = w.AddFloatToPayload([]float32{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddFloatToPayload([]float32{0}) + err = w.AddFloatToPayload([]float32{0}, nil) require.Error(t, err) }) t.Run("Test Double", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) + w, err := NewPayloadWriter(schemapb.DataType_Double, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddDoubleToPayload([]float64{}) + err = w.AddDoubleToPayload([]float64{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddDoubleToPayload([]float64{0}) + err = w.AddDoubleToPayload([]float64{0}, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddDoubleToPayload([]float64{0}) + err = w.AddDoubleToPayload([]float64{0}, nil) require.Error(t, err) }) t.Run("Test String", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) + w, err := NewPayloadWriter(schemapb.DataType_String, false) require.Nil(t, err) require.NotNil(t, w) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddOneStringToPayload("test") + err = w.AddOneStringToPayload("test", false) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneStringToPayload("test") + err = w.AddOneStringToPayload("test", false) require.Error(t, err) }) t.Run("Test Array", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Array) + w, err := NewPayloadWriter(schemapb.DataType_Array, false) require.Nil(t, err) require.NotNil(t, w) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddOneArrayToPayload(&schemapb.ScalarField{}) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneArrayToPayload(&schemapb.ScalarField{}) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false) require.Error(t, err) }) t.Run("Test Json", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_JSON) + w, err := NewPayloadWriter(schemapb.DataType_JSON, false) require.Nil(t, err) require.NotNil(t, w) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddOneJSONToPayload([]byte{0, 1}) + err = w.AddOneJSONToPayload([]byte{0, 1}, false) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddOneJSONToPayload([]byte{0, 1}) + err = w.AddOneJSONToPayload([]byte{0, 1}, false) require.Error(t, err) }) t.Run("Test BinaryVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) require.Nil(t, err) require.NotNil(t, w) @@ -258,7 +265,7 @@ func TestPayloadWriter_Failed(t *testing.T) { err = w.AddBinaryVectorToPayload(data, 8) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) @@ -267,7 +274,7 @@ func TestPayloadWriter_Failed(t *testing.T) { }) t.Run("Test FloatVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8) + w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) require.Nil(t, err) require.NotNil(t, w) @@ -276,20 +283,20 @@ func TestPayloadWriter_Failed(t *testing.T) { data[i] = 1 } - err = w.AddFloatToPayload([]float32{}) + err = w.AddFloatToPayload([]float32{}, nil) require.Error(t, err) err = w.FinishPayloadWriter() require.NoError(t, err) - err = w.AddFloatToPayload(data) + err = w.AddFloatToPayload(data, nil) require.Error(t, err) - w, err = NewPayloadWriter(schemapb.DataType_Int64) + w, err = NewPayloadWriter(schemapb.DataType_Int64, false) require.Nil(t, err) require.NotNil(t, w) - err = w.AddFloatToPayload(data) + err = w.AddFloatToPayload(data, nil) require.Error(t, err) }) } diff --git a/internal/storage/print_binlog.go b/internal/storage/print_binlog.go index 61cb03cb6d..01dfe72bc2 100644 --- a/internal/storage/print_binlog.go +++ b/internal/storage/print_binlog.go @@ -224,7 +224,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Println("\tpayload values:") switch colType { case schemapb.DataType_Bool: - val, err := reader.GetBoolFromPayload() + val, _, err := reader.GetBoolFromPayload() if err != nil { return err } @@ -232,7 +232,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %v\n", i, v) } case schemapb.DataType_Int8: - val, err := reader.GetInt8FromPayload() + val, _, err := reader.GetInt8FromPayload() if err != nil { return err } @@ -240,7 +240,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %d\n", i, v) } case schemapb.DataType_Int16: - val, err := reader.GetInt16FromPayload() + val, _, err := reader.GetInt16FromPayload() if err != nil { return err } @@ -248,7 +248,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %d\n", i, v) } case schemapb.DataType_Int32: - val, err := reader.GetInt32FromPayload() + val, _, err := reader.GetInt32FromPayload() if err != nil { return err } @@ -256,7 +256,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %d\n", i, v) } case schemapb.DataType_Int64: - val, err := reader.GetInt64FromPayload() + val, _, err := reader.GetInt64FromPayload() if err != nil { return err } @@ -264,7 +264,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %d\n", i, v) } case schemapb.DataType_Float: - val, err := reader.GetFloatFromPayload() + val, _, err := reader.GetFloatFromPayload() if err != nil { return err } @@ -272,7 +272,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface fmt.Printf("\t\t%d : %f\n", i, v) } case schemapb.DataType_Double: - val, err := reader.GetDoubleFromPayload() + val, _, err := reader.GetDoubleFromPayload() if err != nil { return err } @@ -285,7 +285,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface return err } - val, err := reader.GetStringFromPayload() + val, _, err := reader.GetStringFromPayload() if err != nil { return err } @@ -358,13 +358,16 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface if err != nil { return err } - val, err := reader.GetJSONFromPayload() + val, valids, err := reader.GetJSONFromPayload() if err != nil { return err } for i := 0; i < rows; i++ { fmt.Printf("\t\t%d : %s\n", i, val[i]) } + for i, v := range valids { + fmt.Printf("\t\t%d : %v\n", i, v) + } case schemapb.DataType_SparseFloatVector: sparseData, _, err := reader.GetSparseFloatVectorFromPayload() if err != nil { @@ -388,7 +391,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r fmt.Println("\tpayload values:") switch colType { case schemapb.DataType_Int64: - val, err := reader.GetInt64FromPayload() + val, _, err := reader.GetInt64FromPayload() if err != nil { return err } @@ -402,7 +405,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r return err } - val, err := reader.GetStringFromPayload() + val, _, err := reader.GetStringFromPayload() if err != nil { return err } @@ -448,7 +451,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error { if dataType == schemapb.DataType_Int8 { if key == IndexParamsKey { - content, err := reader.GetByteFromPayload() + content, _, err := reader.GetByteFromPayload() if err != nil { return err } @@ -459,7 +462,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data } if key == "SLICE_META" { - content, err := reader.GetByteFromPayload() + content, _, err := reader.GetByteFromPayload() if err != nil { return err } @@ -473,7 +476,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data } } else { if key == IndexParamsKey { - content, err := reader.GetStringFromPayload() + content, _, err := reader.GetStringFromPayload() if err != nil { return err } @@ -484,7 +487,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data } if key == "SLICE_META" { - content, err := reader.GetStringFromPayload() + content, _, err := reader.GetStringFromPayload() if err != nil { return err } diff --git a/internal/storage/print_binlog_test.go b/internal/storage/print_binlog_test.go index 89cefe1c4e..9d3b3dfb21 100644 --- a/internal/storage/print_binlog_test.go +++ b/internal/storage/print_binlog_test.go @@ -36,27 +36,27 @@ import ( ) func TestPrintBinlogFilesInt64(t *testing.T) { - w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40) + w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) curTS := time.Now().UnixNano() / int64(time.Millisecond) - e1, err := w.NextInsertEventWriter() + e1, err := w.NextInsertEventWriter(false) assert.NoError(t, err) - err = e1.AddDataToPayload([]int64{1, 2, 3}) + err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) assert.NoError(t, err) - err = e1.AddDataToPayload([]int32{4, 5, 6}) + err = e1.AddDataToPayload([]int32{4, 5, 6}, nil) assert.Error(t, err) - err = e1.AddDataToPayload([]int64{4, 5, 6}) + err = e1.AddDataToPayload([]int64{4, 5, 6}, nil) assert.NoError(t, err) e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0)) - e2, err := w.NextInsertEventWriter() + e2, err := w.NextInsertEventWriter(false) assert.NoError(t, err) - err = e2.AddDataToPayload([]int64{7, 8, 9}) + err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) assert.NoError(t, err) - err = e2.AddDataToPayload([]bool{true, false, true}) + err = e2.AddDataToPayload([]bool{true, false, true}, nil) assert.Error(t, err) - err = e2.AddDataToPayload([]int64{10, 11, 12}) + err = e2.AddDataToPayload([]int64{10, 11, 12}, nil) assert.NoError(t, err) e2.SetEventTimestamp(tsoutil.ComposeTS(curTS+30*60*1000, 0), tsoutil.ComposeTS(curTS+40*60*1000, 0)) diff --git a/internal/storage/unsafe.go b/internal/storage/unsafe.go index 33056788ae..bd565f3f9c 100644 --- a/internal/storage/unsafe.go +++ b/internal/storage/unsafe.go @@ -59,3 +59,9 @@ func UnsafeReadFloat64(buf []byte, idx int) float64 { ptr := unsafe.Pointer(&(buf[idx])) return *((*float64)(ptr)) } + +/* #nosec G103 */ +func UnsafeReadBool(buf []byte, idx int) bool { + ptr := unsafe.Pointer(&(buf[idx])) + return *((*bool)(ptr)) +} diff --git a/internal/storage/utils.go b/internal/storage/utils.go index 06e8d1ca7c..c8b16328f9 100644 --- a/internal/storage/utils.go +++ b/internal/storage/utils.go @@ -567,30 +567,38 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche case schemapb.DataType_Bool: srcData := srcField.GetScalars().GetBoolData().GetData() + validData := srcField.GetValidData() fieldData = &BoolFieldData{ - Data: lo.Map(srcData, func(v bool, _ int) bool { return v }), + Data: lo.Map(srcData, func(v bool, _ int) bool { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Int8: srcData := srcField.GetScalars().GetIntData().GetData() + validData := srcField.GetValidData() fieldData = &Int8FieldData{ - Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }), + Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Int16: srcData := srcField.GetScalars().GetIntData().GetData() + validData := srcField.GetValidData() fieldData = &Int16FieldData{ - Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }), + Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Int32: srcData := srcField.GetScalars().GetIntData().GetData() + validData := srcField.GetValidData() fieldData = &Int32FieldData{ - Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }), + Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Int64: @@ -605,45 +613,57 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche } default: srcData := srcField.GetScalars().GetLongData().GetData() + validData := srcField.GetValidData() fieldData = &Int64FieldData{ - Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }), + Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } } case schemapb.DataType_Float: srcData := srcField.GetScalars().GetFloatData().GetData() + validData := srcField.GetValidData() fieldData = &FloatFieldData{ - Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }), + Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Double: srcData := srcField.GetScalars().GetDoubleData().GetData() + validData := srcField.GetValidData() fieldData = &DoubleFieldData{ - Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }), + Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_String, schemapb.DataType_VarChar: srcData := srcField.GetScalars().GetStringData().GetData() + validData := srcField.GetValidData() fieldData = &StringFieldData{ - Data: lo.Map(srcData, func(v string, _ int) string { return v }), + Data: lo.Map(srcData, func(v string, _ int) string { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_Array: srcData := srcField.GetScalars().GetArrayData().GetData() + validData := srcField.GetValidData() fieldData = &ArrayFieldData{ ElementType: field.GetElementType(), Data: lo.Map(srcData, func(v *schemapb.ScalarField, _ int) *schemapb.ScalarField { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } case schemapb.DataType_JSON: srcData := srcField.GetScalars().GetJsonData().GetData() + validData := srcField.GetValidData() fieldData = &JSONFieldData{ - Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }), + Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }), + ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }), } default: @@ -676,89 +696,105 @@ func InsertMsgToInsertData(msg *msgstream.InsertMsg, schema *schemapb.Collection func mergeBoolField(data *InsertData, fid FieldID, field *BoolFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &BoolFieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*BoolFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeInt8Field(data *InsertData, fid FieldID, field *Int8FieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &Int8FieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*Int8FieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeInt16Field(data *InsertData, fid FieldID, field *Int16FieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &Int16FieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*Int16FieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeInt32Field(data *InsertData, fid FieldID, field *Int32FieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &Int32FieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*Int32FieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeInt64Field(data *InsertData, fid FieldID, field *Int64FieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &Int64FieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*Int64FieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeFloatField(data *InsertData, fid FieldID, field *FloatFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &FloatFieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*FloatFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeDoubleField(data *InsertData, fid FieldID, field *DoubleFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &DoubleFieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*DoubleFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeStringField(data *InsertData, fid FieldID, field *StringFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &StringFieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*StringFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) { @@ -766,22 +802,26 @@ func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) { fieldData := &ArrayFieldData{ ElementType: field.ElementType, Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*ArrayFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeJSONField(data *InsertData, fid FieldID, field *JSONFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &JSONFieldData{ - Data: nil, + Data: nil, + ValidData: nil, } data.Data[fid] = fieldData } fieldData := data.Data[fid].(*JSONFieldData) fieldData.Data = append(fieldData.Data, field.Data...) + fieldData.ValidData = append(fieldData.ValidData, field.ValidData...) } func mergeBinaryVectorField(data *InsertData, fid FieldID, field *BinaryVectorFieldData) { diff --git a/internal/storage/utils_test.go b/internal/storage/utils_test.go index 25eb19cf06..ca906f6670 100644 --- a/internal/storage/utils_test.go +++ b/internal/storage/utils_test.go @@ -434,6 +434,121 @@ func genAllFieldsSchema(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse return schema, pkFieldID, fieldIDs } +func genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse bool) (schema *schemapb.CollectionSchema, pkFieldID UniqueID, fieldIDs []UniqueID) { + schema = &schemapb.CollectionSchema{ + Name: "all_fields_schema_nullable", + Description: "all_fields_schema_nullable", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + DataType: schemapb.DataType_Int64, + IsPrimaryKey: true, + }, + { + DataType: schemapb.DataType_Bool, + Nullable: true, + }, + { + DataType: schemapb.DataType_Int8, + Nullable: true, + }, + { + DataType: schemapb.DataType_Int16, + Nullable: true, + }, + { + DataType: schemapb.DataType_Int32, + Nullable: true, + }, + { + DataType: schemapb.DataType_Int64, + Nullable: true, + }, + { + DataType: schemapb.DataType_Float, + Nullable: true, + }, + { + DataType: schemapb.DataType_Double, + Nullable: true, + }, + { + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: strconv.Itoa(fVecDim), + }, + }, + }, + { + DataType: schemapb.DataType_BinaryVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: strconv.Itoa(bVecDim), + }, + }, + }, + { + DataType: schemapb.DataType_Float16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: strconv.Itoa(f16VecDim), + }, + }, + }, + { + DataType: schemapb.DataType_BFloat16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: strconv.Itoa(bf16VecDim), + }, + }, + }, + { + DataType: schemapb.DataType_Array, + Nullable: true, + }, + { + DataType: schemapb.DataType_JSON, + Nullable: true, + }, + }, + } + if withSparse { + schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ + DataType: schemapb.DataType_SparseFloatVector, + }) + } + fieldIDs = make([]UniqueID, 0) + for idx := range schema.Fields { + fID := int64(common.StartOfUserFieldID + idx) + schema.Fields[idx].FieldID = fID + if schema.Fields[idx].IsPrimaryKey { + pkFieldID = fID + } + fieldIDs = append(fieldIDs, fID) + } + schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ + FieldID: common.RowIDField, + Name: common.RowIDFieldName, + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Int64, + }) + schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ + FieldID: common.TimeStampField, + Name: common.TimeStampFieldName, + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Int64, + }) + return schema, pkFieldID, fieldIDs +} + func generateInt32ArrayList(numRows int) []*schemapb.ScalarField { ret := make([]*schemapb.ScalarField, 0, numRows) for i := 0; i < numRows; i++ { @@ -616,6 +731,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -636,6 +754,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], int8(d)) @@ -656,6 +777,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], int16(d)) @@ -676,6 +800,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -696,6 +823,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -717,6 +847,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -737,6 +870,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -856,6 +992,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -877,6 +1016,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim }, FieldId: field.FieldID, } + if field.GetNullable() { + f.ValidData = testutils.GenerateBoolArray(numRows) + } msg.FieldsData = append(msg.FieldsData, f) for _, d := range data { columns[idx] = append(columns[idx], d) @@ -1019,6 +1161,24 @@ func TestColumnBasedInsertMsgToInsertData(t *testing.T) { } } +func TestColumnBasedInsertMsgToInsertDataNullable(t *testing.T) { + numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim := 2, 2, 8, 2, 2 + schema, _, fieldIDs := genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim, true) + msg, _, columns := genColumnBasedInsertMsg(schema, numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim) + + idata, err := ColumnBasedInsertMsgToInsertData(msg, schema) + assert.NoError(t, err) + for idx, fID := range fieldIDs { + column := columns[idx] + fData, ok := idata.Data[fID] + assert.True(t, ok) + assert.Equal(t, len(column), fData.RowNum()) + for j := range column { + assert.Equal(t, fData.GetRow(j), column[j]) + } + } +} + func TestColumnBasedInsertMsgToInsertFloat16VectorDataError(t *testing.T) { msg := &msgstream.InsertMsg{ BaseMsg: msgstream.BaseMsg{ @@ -1145,233 +1305,391 @@ func TestInsertMsgToInsertData2(t *testing.T) { } func TestMergeInsertData(t *testing.T) { - d1 := &InsertData{ - Data: map[int64]FieldData{ - common.RowIDField: &Int64FieldData{ - Data: []int64{1}, - }, - common.TimeStampField: &Int64FieldData{ - Data: []int64{1}, - }, - BoolField: &BoolFieldData{ - Data: []bool{true}, - }, - Int8Field: &Int8FieldData{ - Data: []int8{1}, - }, - Int16Field: &Int16FieldData{ - Data: []int16{1}, - }, - Int32Field: &Int32FieldData{ - Data: []int32{1}, - }, - Int64Field: &Int64FieldData{ - Data: []int64{1}, - }, - FloatField: &FloatFieldData{ - Data: []float32{0}, - }, - DoubleField: &DoubleFieldData{ - Data: []float64{0}, - }, - StringField: &StringFieldData{ - Data: []string{"1"}, - }, - BinaryVectorField: &BinaryVectorFieldData{ - Data: []byte{0}, - Dim: 8, - }, - FloatVectorField: &FloatVectorFieldData{ - Data: []float32{0}, - Dim: 1, - }, - Float16VectorField: &Float16VectorFieldData{ - Data: []byte{0, 1}, - Dim: 1, - }, - BFloat16VectorField: &BFloat16VectorFieldData{ - Data: []byte{0, 1}, - Dim: 1, - }, - SparseFloatVectorField: &SparseFloatVectorFieldData{ - SparseFloatArray: schemapb.SparseFloatArray{ - Dim: 600, - Contents: [][]byte{ - typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), - typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), + t.Run("empty data in buffer", func(t *testing.T) { + d1 := &InsertData{ + Data: make(map[FieldID]FieldData), + Infos: []BlobInfo{}, + } + d2 := &InsertData{ + Data: map[int64]FieldData{ + common.RowIDField: &Int64FieldData{ + Data: []int64{2}, + }, + common.TimeStampField: &Int64FieldData{ + Data: []int64{2}, + }, + BoolField: &BoolFieldData{ + Data: []bool{false}, + }, + Int8Field: &Int8FieldData{ + Data: []int8{2}, + }, + Int16Field: &Int16FieldData{ + Data: []int16{2}, + }, + Int32Field: &Int32FieldData{ + Data: []int32{2}, + }, + Int64Field: &Int64FieldData{ + Data: []int64{2}, + }, + FloatField: &FloatFieldData{ + Data: []float32{0}, + }, + DoubleField: &DoubleFieldData{ + Data: []float64{0}, + }, + StringField: &StringFieldData{ + Data: []string{"2"}, + }, + BinaryVectorField: &BinaryVectorFieldData{ + Data: []byte{0}, + Dim: 8, + }, + FloatVectorField: &FloatVectorFieldData{ + Data: []float32{0}, + Dim: 1, + }, + Float16VectorField: &Float16VectorFieldData{ + Data: []byte{2, 3}, + Dim: 1, + }, + BFloat16VectorField: &BFloat16VectorFieldData{ + Data: []byte{2, 3}, + Dim: 1, + }, + SparseFloatVectorField: &SparseFloatVectorFieldData{ + SparseFloatArray: schemapb.SparseFloatArray{ + Dim: 600, + Contents: [][]byte{ + typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), + }, }, }, - }, - ArrayField: &ArrayFieldData{ - Data: []*schemapb.ScalarField{ - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{1, 2, 3}, + ArrayField: &ArrayFieldData{ + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{4, 5, 6}, + }, }, }, }, }, - }, - JSONField: &JSONFieldData{ - Data: [][]byte{[]byte(`{"key":"value"}`)}, - }, - }, - Infos: nil, - } - d2 := &InsertData{ - Data: map[int64]FieldData{ - common.RowIDField: &Int64FieldData{ - Data: []int64{2}, - }, - common.TimeStampField: &Int64FieldData{ - Data: []int64{2}, - }, - BoolField: &BoolFieldData{ - Data: []bool{false}, - }, - Int8Field: &Int8FieldData{ - Data: []int8{2}, - }, - Int16Field: &Int16FieldData{ - Data: []int16{2}, - }, - Int32Field: &Int32FieldData{ - Data: []int32{2}, - }, - Int64Field: &Int64FieldData{ - Data: []int64{2}, - }, - FloatField: &FloatFieldData{ - Data: []float32{0}, - }, - DoubleField: &DoubleFieldData{ - Data: []float64{0}, - }, - StringField: &StringFieldData{ - Data: []string{"2"}, - }, - BinaryVectorField: &BinaryVectorFieldData{ - Data: []byte{0}, - Dim: 8, - }, - FloatVectorField: &FloatVectorFieldData{ - Data: []float32{0}, - Dim: 1, - }, - Float16VectorField: &Float16VectorFieldData{ - Data: []byte{2, 3}, - Dim: 1, - }, - BFloat16VectorField: &BFloat16VectorFieldData{ - Data: []byte{2, 3}, - Dim: 1, - }, - SparseFloatVectorField: &SparseFloatVectorFieldData{ - SparseFloatArray: schemapb.SparseFloatArray{ - Dim: 600, - Contents: [][]byte{ - typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), - }, + JSONField: &JSONFieldData{ + Data: [][]byte{[]byte(`{"hello":"world"}`)}, }, }, - ArrayField: &ArrayFieldData{ - Data: []*schemapb.ScalarField{ - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{4, 5, 6}, + Infos: nil, + } + + MergeInsertData(d1, d2) + + f, ok := d1.Data[common.RowIDField] + assert.True(t, ok) + assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data) + + f, ok = d1.Data[common.TimeStampField] + assert.True(t, ok) + assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data) + + f, ok = d1.Data[BoolField] + assert.True(t, ok) + assert.Equal(t, []bool{false}, f.(*BoolFieldData).Data) + + f, ok = d1.Data[Int8Field] + assert.True(t, ok) + assert.Equal(t, []int8{2}, f.(*Int8FieldData).Data) + + f, ok = d1.Data[Int16Field] + assert.True(t, ok) + assert.Equal(t, []int16{2}, f.(*Int16FieldData).Data) + + f, ok = d1.Data[Int32Field] + assert.True(t, ok) + assert.Equal(t, []int32{2}, f.(*Int32FieldData).Data) + + f, ok = d1.Data[Int64Field] + assert.True(t, ok) + assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data) + + f, ok = d1.Data[FloatField] + assert.True(t, ok) + assert.Equal(t, []float32{0}, f.(*FloatFieldData).Data) + + f, ok = d1.Data[DoubleField] + assert.True(t, ok) + assert.Equal(t, []float64{0}, f.(*DoubleFieldData).Data) + + f, ok = d1.Data[StringField] + assert.True(t, ok) + assert.Equal(t, []string{"2"}, f.(*StringFieldData).Data) + + f, ok = d1.Data[BinaryVectorField] + assert.True(t, ok) + assert.Equal(t, []byte{0}, f.(*BinaryVectorFieldData).Data) + + f, ok = d1.Data[FloatVectorField] + assert.True(t, ok) + assert.Equal(t, []float32{0}, f.(*FloatVectorFieldData).Data) + + f, ok = d1.Data[Float16VectorField] + assert.True(t, ok) + assert.Equal(t, []byte{2, 3}, f.(*Float16VectorFieldData).Data) + + f, ok = d1.Data[BFloat16VectorField] + assert.True(t, ok) + assert.Equal(t, []byte{2, 3}, f.(*BFloat16VectorFieldData).Data) + + f, ok = d1.Data[SparseFloatVectorField] + assert.True(t, ok) + assert.Equal(t, &SparseFloatVectorFieldData{ + SparseFloatArray: schemapb.SparseFloatArray{ + Dim: 600, + Contents: [][]byte{ + typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), + }, + }, + }, f.(*SparseFloatVectorFieldData)) + + f, ok = d1.Data[ArrayField] + assert.True(t, ok) + assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[0].GetIntData().GetData()) + + f, ok = d1.Data[JSONField] + assert.True(t, ok) + assert.EqualValues(t, [][]byte{[]byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data) + }) + t.Run("normal case", func(t *testing.T) { + d1 := &InsertData{ + Data: map[int64]FieldData{ + common.RowIDField: &Int64FieldData{ + Data: []int64{1}, + }, + common.TimeStampField: &Int64FieldData{ + Data: []int64{1}, + }, + BoolField: &BoolFieldData{ + Data: []bool{true}, + }, + Int8Field: &Int8FieldData{ + Data: []int8{1}, + }, + Int16Field: &Int16FieldData{ + Data: []int16{1}, + }, + Int32Field: &Int32FieldData{ + Data: []int32{1}, + }, + Int64Field: &Int64FieldData{ + Data: []int64{1}, + }, + FloatField: &FloatFieldData{ + Data: []float32{0}, + }, + DoubleField: &DoubleFieldData{ + Data: []float64{0}, + }, + StringField: &StringFieldData{ + Data: []string{"1"}, + }, + BinaryVectorField: &BinaryVectorFieldData{ + Data: []byte{0}, + Dim: 8, + }, + FloatVectorField: &FloatVectorFieldData{ + Data: []float32{0}, + Dim: 1, + }, + Float16VectorField: &Float16VectorFieldData{ + Data: []byte{0, 1}, + Dim: 1, + }, + BFloat16VectorField: &BFloat16VectorFieldData{ + Data: []byte{0, 1}, + Dim: 1, + }, + SparseFloatVectorField: &SparseFloatVectorFieldData{ + SparseFloatArray: schemapb.SparseFloatArray{ + Dim: 600, + Contents: [][]byte{ + typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), + typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), + }, + }, + }, + ArrayField: &ArrayFieldData{ + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2, 3}, + }, }, }, }, }, + JSONField: &JSONFieldData{ + Data: [][]byte{[]byte(`{"key":"value"}`)}, + }, }, - JSONField: &JSONFieldData{ - Data: [][]byte{[]byte(`{"hello":"world"}`)}, + Infos: nil, + } + d2 := &InsertData{ + Data: map[int64]FieldData{ + common.RowIDField: &Int64FieldData{ + Data: []int64{2}, + }, + common.TimeStampField: &Int64FieldData{ + Data: []int64{2}, + }, + BoolField: &BoolFieldData{ + Data: []bool{false}, + }, + Int8Field: &Int8FieldData{ + Data: []int8{2}, + }, + Int16Field: &Int16FieldData{ + Data: []int16{2}, + }, + Int32Field: &Int32FieldData{ + Data: []int32{2}, + }, + Int64Field: &Int64FieldData{ + Data: []int64{2}, + }, + FloatField: &FloatFieldData{ + Data: []float32{0}, + }, + DoubleField: &DoubleFieldData{ + Data: []float64{0}, + }, + StringField: &StringFieldData{ + Data: []string{"2"}, + }, + BinaryVectorField: &BinaryVectorFieldData{ + Data: []byte{0}, + Dim: 8, + }, + FloatVectorField: &FloatVectorFieldData{ + Data: []float32{0}, + Dim: 1, + }, + Float16VectorField: &Float16VectorFieldData{ + Data: []byte{2, 3}, + Dim: 1, + }, + BFloat16VectorField: &BFloat16VectorFieldData{ + Data: []byte{2, 3}, + Dim: 1, + }, + SparseFloatVectorField: &SparseFloatVectorFieldData{ + SparseFloatArray: schemapb.SparseFloatArray{ + Dim: 600, + Contents: [][]byte{ + typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), + }, + }, + }, + ArrayField: &ArrayFieldData{ + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{4, 5, 6}, + }, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{[]byte(`{"hello":"world"}`)}, + }, }, - }, - Infos: nil, - } + Infos: nil, + } - MergeInsertData(d1, d2) + MergeInsertData(d1, d2) - f, ok := d1.Data[common.RowIDField] - assert.True(t, ok) - assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) + f, ok := d1.Data[common.RowIDField] + assert.True(t, ok) + assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) - f, ok = d1.Data[common.TimeStampField] - assert.True(t, ok) - assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) + f, ok = d1.Data[common.TimeStampField] + assert.True(t, ok) + assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) - f, ok = d1.Data[BoolField] - assert.True(t, ok) - assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data) + f, ok = d1.Data[BoolField] + assert.True(t, ok) + assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data) - f, ok = d1.Data[Int8Field] - assert.True(t, ok) - assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data) + f, ok = d1.Data[Int8Field] + assert.True(t, ok) + assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data) - f, ok = d1.Data[Int16Field] - assert.True(t, ok) - assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data) + f, ok = d1.Data[Int16Field] + assert.True(t, ok) + assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data) - f, ok = d1.Data[Int32Field] - assert.True(t, ok) - assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data) + f, ok = d1.Data[Int32Field] + assert.True(t, ok) + assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data) - f, ok = d1.Data[Int64Field] - assert.True(t, ok) - assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) + f, ok = d1.Data[Int64Field] + assert.True(t, ok) + assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data) - f, ok = d1.Data[FloatField] - assert.True(t, ok) - assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data) + f, ok = d1.Data[FloatField] + assert.True(t, ok) + assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data) - f, ok = d1.Data[DoubleField] - assert.True(t, ok) - assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data) + f, ok = d1.Data[DoubleField] + assert.True(t, ok) + assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data) - f, ok = d1.Data[StringField] - assert.True(t, ok) - assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data) + f, ok = d1.Data[StringField] + assert.True(t, ok) + assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data) - f, ok = d1.Data[BinaryVectorField] - assert.True(t, ok) - assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data) + f, ok = d1.Data[BinaryVectorField] + assert.True(t, ok) + assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data) - f, ok = d1.Data[FloatVectorField] - assert.True(t, ok) - assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data) + f, ok = d1.Data[FloatVectorField] + assert.True(t, ok) + assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data) - f, ok = d1.Data[Float16VectorField] - assert.True(t, ok) - assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data) + f, ok = d1.Data[Float16VectorField] + assert.True(t, ok) + assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data) - f, ok = d1.Data[BFloat16VectorField] - assert.True(t, ok) - assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data) + f, ok = d1.Data[BFloat16VectorField] + assert.True(t, ok) + assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data) - f, ok = d1.Data[SparseFloatVectorField] - assert.True(t, ok) - assert.Equal(t, &SparseFloatVectorFieldData{ - SparseFloatArray: schemapb.SparseFloatArray{ - Dim: 600, - Contents: [][]byte{ - typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), - typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), - typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), + f, ok = d1.Data[SparseFloatVectorField] + assert.True(t, ok) + assert.Equal(t, &SparseFloatVectorFieldData{ + SparseFloatArray: schemapb.SparseFloatArray{ + Dim: 600, + Contents: [][]byte{ + typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}), + typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}), + typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}), + }, }, - }, - }, f.(*SparseFloatVectorFieldData)) + }, f.(*SparseFloatVectorFieldData)) - f, ok = d1.Data[ArrayField] - assert.True(t, ok) - assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData()) - assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData()) + f, ok = d1.Data[ArrayField] + assert.True(t, ok) + assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData()) + assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData()) - f, ok = d1.Data[JSONField] - assert.True(t, ok) - assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data) + f, ok = d1.Data[JSONField] + assert.True(t, ok) + assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data) + }) } func TestMergeFloat16VectorField(t *testing.T) { diff --git a/internal/util/importutilv2/binlog/reader_test.go b/internal/util/importutilv2/binlog/reader_test.go index 0ddedff5ac..a179374723 100644 --- a/internal/util/importutilv2/binlog/reader_test.go +++ b/internal/util/importutilv2/binlog/reader_test.go @@ -70,7 +70,7 @@ func (suite *ReaderSuite) SetupTest() { func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.FieldData) []byte { dataType := field.GetDataType() - w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID()) + w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID(), false) assert.NotNil(t, w) defer w.Close() @@ -81,7 +81,7 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie dim = 1 } - evt, err := w.NextInsertEventWriter(int(dim)) + evt, err := w.NextInsertEventWriter(false, int(dim)) assert.NoError(t, err) evt.SetEventTimestamp(1, math.MaxInt64) @@ -94,42 +94,42 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie switch dataType { case schemapb.DataType_Bool: - err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data) + err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Int8: - err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data) + err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Int16: - err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data) + err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Int32: - err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data) + err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Int64: - err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data) + err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Float: - err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data) + err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_Double: - err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data) + err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data, nil) assert.NoError(t, err) case schemapb.DataType_VarChar: values := data.(*storage.StringFieldData).Data for _, val := range values { - err = evt.AddOneStringToPayload(val) + err = evt.AddOneStringToPayload(val, true) assert.NoError(t, err) } case schemapb.DataType_JSON: rows := data.(*storage.JSONFieldData).Data for i := 0; i < len(rows); i++ { - err = evt.AddOneJSONToPayload(rows[i]) + err = evt.AddOneJSONToPayload(rows[i], true) assert.NoError(t, err) } case schemapb.DataType_Array: rows := data.(*storage.ArrayFieldData).Data for i := 0; i < len(rows); i++ { - err = evt.AddOneArrayToPayload(rows[i]) + err = evt.AddOneArrayToPayload(rows[i], true) assert.NoError(t, err) } case schemapb.DataType_BinaryVector: diff --git a/internal/util/importutilv2/binlog/util.go b/internal/util/importutilv2/binlog/util.go index 7cee661b5b..6d10556755 100644 --- a/internal/util/importutilv2/binlog/util.go +++ b/internal/util/importutilv2/binlog/util.go @@ -43,7 +43,7 @@ func readData(reader *storage.BinlogReader, et storage.EventTypeCode) ([]any, er return nil, merr.WrapErrImportFailed(fmt.Sprintf("wrong binlog type, expect:%s, actual:%s", et.String(), event.TypeCode.String())) } - rows, _, err := event.PayloadReaderInterface.GetDataFromPayload() + rows, _, _, err := event.PayloadReaderInterface.GetDataFromPayload() if err != nil { return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read data, error: %v", err)) } diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 0c840a6489..e55b643379 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -590,6 +590,7 @@ func AppendFieldData(dst, src []*schemapb.FieldData, idx int64) (appendSize int6 Field: &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{}, }, + ValidData: fieldData.GetValidData(), } } dstScalar := dst[i].GetScalars()