enhance: binlog primary key turn off dict encoding (#34358)

issue: #34357 

Go Parquet uses dictionary encoding by default, and it will fall back to
plain encoding if the dictionary size exceeds the dictionary size page
limit. Users can specify custom fallback encoding by using
`parquet.WithEncoding(ENCODING_METHOD)` in writer properties. However,
Go Parquet [fallbacks to plain
encoding](e65c1e295d/go/parquet/file/column_writer_types.gen.go.tmpl (L238))
rather than custom encoding method users provide. Therefore, this patch
only turns off dictionary encoding for the primary key.

With a 5 million auto ID primary key benchmark, the parquet file size
improves from 13.93 MB to 8.36 MB when dictionary encoding is turned
off, reducing primary key storage space by 40%.

Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
This commit is contained in:
shaoting-huang 2024-07-17 17:47:44 +08:00 committed by GitHub
parent 67324eb809
commit 88b373b024
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 332 additions and 289 deletions

View File

@ -39,7 +39,7 @@ func generateTestSchema() *schemapb.CollectionSchema {
{FieldID: 13, Name: "int64", DataType: schemapb.DataType_Int64}, {FieldID: 13, Name: "int64", DataType: schemapb.DataType_Int64},
{FieldID: 14, Name: "float", DataType: schemapb.DataType_Float}, {FieldID: 14, Name: "float", DataType: schemapb.DataType_Float},
{FieldID: 15, Name: "double", DataType: schemapb.DataType_Double}, {FieldID: 15, Name: "double", DataType: schemapb.DataType_Double},
{FieldID: 16, Name: "varchar", DataType: schemapb.DataType_VarChar}, {FieldID: 16, Name: "varchar", DataType: schemapb.DataType_VarChar, IsPrimaryKey: true},
{FieldID: 17, Name: "string", DataType: schemapb.DataType_String}, {FieldID: 17, Name: "string", DataType: schemapb.DataType_String},
{FieldID: 18, Name: "array", DataType: schemapb.DataType_Array}, {FieldID: 18, Name: "array", DataType: schemapb.DataType_Array},
{FieldID: 19, Name: "string", DataType: schemapb.DataType_JSON}, {FieldID: 19, Name: "string", DataType: schemapb.DataType_JSON},

View File

@ -39,7 +39,7 @@ import (
func TestInsertBinlog(t *testing.T) { func TestInsertBinlog(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := w.NextInsertEventWriter(false) e1, err := w.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
@ -49,7 +49,7 @@ func TestInsertBinlog(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(100, 200) e1.SetEventTimestamp(100, 200)
e2, err := w.NextInsertEventWriter(false) e2, err := w.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)
@ -1329,7 +1329,7 @@ func TestNewBinlogReaderError(t *testing.T) {
w.SetEventTimeStamp(1000, 2000) w.SetEventTimeStamp(1000, 2000)
e1, err := w.NextInsertEventWriter(false) e1, err := w.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
@ -1393,7 +1393,7 @@ func TestNewBinlogWriterTsError(t *testing.T) {
func TestInsertBinlogWriterCloseError(t *testing.T) { func TestInsertBinlogWriterCloseError(t *testing.T) {
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := insertWriter.NextInsertEventWriter(false) e1, err := insertWriter.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
sizeTotal := 2000000 sizeTotal := 2000000
@ -1406,7 +1406,7 @@ func TestInsertBinlogWriterCloseError(t *testing.T) {
err = insertWriter.Finish() err = insertWriter.Finish()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, insertWriter.buffer) assert.NotNil(t, insertWriter.buffer)
insertEventWriter, err := insertWriter.NextInsertEventWriter(false) insertEventWriter, err := insertWriter.NextInsertEventWriter()
assert.Nil(t, insertEventWriter) assert.Nil(t, insertEventWriter)
assert.Error(t, err) assert.Error(t, err)
insertWriter.Close() insertWriter.Close()

View File

@ -23,7 +23,6 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
) )
// BinlogType is to distinguish different files saving different data. // BinlogType is to distinguish different files saving different data.
@ -150,21 +149,12 @@ type InsertBinlogWriter struct {
} }
// NextInsertEventWriter returns an event writer to write insert data to an event. // NextInsertEventWriter returns an event writer to write insert data to an event.
func (writer *InsertBinlogWriter) NextInsertEventWriter(nullable bool, dim ...int) (*insertEventWriter, error) { func (writer *InsertBinlogWriter) NextInsertEventWriter(opts ...PayloadWriterOptions) (*insertEventWriter, error) {
if writer.isClosed() { if writer.isClosed() {
return nil, fmt.Errorf("binlog has closed") return nil, fmt.Errorf("binlog has closed")
} }
var event *insertEventWriter event, err := newInsertEventWriter(writer.PayloadDataType, opts...)
var err error
if typeutil.IsVectorType(writer.PayloadDataType) && !typeutil.IsSparseFloatVectorType(writer.PayloadDataType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
event, err = newInsertEventWriter(writer.PayloadDataType, nullable, dim[0])
} else {
event, err = newInsertEventWriter(writer.PayloadDataType, nullable)
}
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -179,11 +169,11 @@ type DeleteBinlogWriter struct {
} }
// NextDeleteEventWriter returns an event writer to write delete data to an event. // NextDeleteEventWriter returns an event writer to write delete data to an event.
func (writer *DeleteBinlogWriter) NextDeleteEventWriter() (*deleteEventWriter, error) { func (writer *DeleteBinlogWriter) NextDeleteEventWriter(opts ...PayloadWriterOptions) (*deleteEventWriter, error) {
if writer.isClosed() { if writer.isClosed() {
return nil, fmt.Errorf("binlog has closed") return nil, fmt.Errorf("binlog has closed")
} }
event, err := newDeleteEventWriter(writer.PayloadDataType) event, err := newDeleteEventWriter(writer.PayloadDataType, opts...)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -32,7 +32,7 @@ func TestBinlogWriterReader(t *testing.T) {
binlogWriter.SetEventTimeStamp(1000, 2000) binlogWriter.SetEventTimeStamp(1000, 2000)
defer binlogWriter.Close() defer binlogWriter.Close()
eventWriter, err := binlogWriter.NextInsertEventWriter(false) eventWriter, err := binlogWriter.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil) err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -243,31 +243,18 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
for _, field := range insertCodec.Schema.Schema.Fields { for _, field := range insertCodec.Schema.Schema.Fields {
// encode fields // encode fields
writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID, field.GetNullable()) writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID, field.GetNullable())
var eventWriter *insertEventWriter
var err error // get payload writing configs, including nullable and fallback encoding method
var dim int64 opts := []PayloadWriterOptions{WithNullable(field.GetNullable()), WithWriterProps(getFieldWriterProps(field))}
if typeutil.IsVectorType(field.DataType) {
if field.GetNullable() { if typeutil.IsVectorType(field.DataType) && !typeutil.IsSparseFloatVectorType(field.DataType) {
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("vectorType not support null, fieldName: %s", field.GetName())) dim, err := typeutil.GetDim(field)
if err != nil {
return nil, err
} }
switch field.DataType { opts = append(opts, WithDim(int(dim)))
case schemapb.DataType_FloatVector,
schemapb.DataType_BinaryVector,
schemapb.DataType_Float16Vector,
schemapb.DataType_BFloat16Vector:
dim, err = typeutil.GetDim(field)
if err != nil {
return nil, err
}
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable(), int(dim))
case schemapb.DataType_SparseFloatVector:
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
default:
return nil, fmt.Errorf("undefined data type %d", field.DataType)
}
} else {
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
} }
eventWriter, err := writer.NextInsertEventWriter(opts...)
if err != nil { if err != nil {
writer.Close() writer.Close()
return nil, err return nil, err
@ -711,7 +698,9 @@ func NewDeleteCodec() *DeleteCodec {
// For each delete message, it will save "pk,ts" string to binlog. // For each delete message, it will save "pk,ts" string to binlog.
func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID UniqueID, segmentID UniqueID, data *DeleteData) (*Blob, error) { func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID UniqueID, segmentID UniqueID, data *DeleteData) (*Blob, error) {
binlogWriter := NewDeleteBinlogWriter(schemapb.DataType_String, collectionID, partitionID, segmentID) binlogWriter := NewDeleteBinlogWriter(schemapb.DataType_String, collectionID, partitionID, segmentID)
eventWriter, err := binlogWriter.NextDeleteEventWriter() field := &schemapb.FieldSchema{IsPrimaryKey: true, DataType: schemapb.DataType_String}
opts := []PayloadWriterOptions{WithWriterProps(getFieldWriterProps(field))}
eventWriter, err := binlogWriter.NextDeleteEventWriter(opts...)
if err != nil { if err != nil {
binlogWriter.Close() binlogWriter.Close()
return nil, err return nil, err

View File

@ -977,7 +977,7 @@ func TestDeleteData(t *testing.T) {
func TestAddFieldDataToPayload(t *testing.T) { func TestAddFieldDataToPayload(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false) w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e, _ := w.NextInsertEventWriter(false) e, _ := w.NextInsertEventWriter()
var err error var err error
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil}) err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil})
assert.Error(t, err) assert.Error(t, err)

View File

@ -195,7 +195,7 @@ func TestInsertEvent(t *testing.T) {
} }
t.Run("insert_bool", func(t *testing.T) { t.Run("insert_bool", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Bool, false) w, err := newInsertEventWriter(schemapb.DataType_Bool)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Bool, w, insertT(t, schemapb.DataType_Bool, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -211,7 +211,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_int8", func(t *testing.T) { t.Run("insert_int8", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int8, false) w, err := newInsertEventWriter(schemapb.DataType_Int8)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int8, w, insertT(t, schemapb.DataType_Int8, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -227,7 +227,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_int16", func(t *testing.T) { t.Run("insert_int16", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int16, false) w, err := newInsertEventWriter(schemapb.DataType_Int16)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int16, w, insertT(t, schemapb.DataType_Int16, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -243,7 +243,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_int32", func(t *testing.T) { t.Run("insert_int32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int32, false) w, err := newInsertEventWriter(schemapb.DataType_Int32)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int32, w, insertT(t, schemapb.DataType_Int32, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -259,7 +259,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_int64", func(t *testing.T) { t.Run("insert_int64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int64, false) w, err := newInsertEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Int64, w, insertT(t, schemapb.DataType_Int64, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -275,7 +275,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_float32", func(t *testing.T) { t.Run("insert_float32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Float, false) w, err := newInsertEventWriter(schemapb.DataType_Float)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Float, w, insertT(t, schemapb.DataType_Float, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -291,7 +291,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_float64", func(t *testing.T) { t.Run("insert_float64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Double, false) w, err := newInsertEventWriter(schemapb.DataType_Double)
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_Double, w, insertT(t, schemapb.DataType_Double, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -307,7 +307,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_binary_vector", func(t *testing.T) { t.Run("insert_binary_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, false, 16) w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, WithDim(16))
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_BinaryVector, w, insertT(t, schemapb.DataType_BinaryVector, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -323,7 +323,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_float_vector", func(t *testing.T) { t.Run("insert_float_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_FloatVector, false, 2) w, err := newInsertEventWriter(schemapb.DataType_FloatVector, WithDim(2))
assert.NoError(t, err) assert.NoError(t, err)
insertT(t, schemapb.DataType_FloatVector, w, insertT(t, schemapb.DataType_FloatVector, w,
func(w *insertEventWriter) error { func(w *insertEventWriter) error {
@ -339,7 +339,7 @@ func TestInsertEvent(t *testing.T) {
}) })
t.Run("insert_string", func(t *testing.T) { t.Run("insert_string", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String, false) w, err := newInsertEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234", nil) err = w.AddDataToPayload("1234", nil)
@ -1101,7 +1101,7 @@ func TestEventReaderError(t *testing.T) {
} }
func TestEventClose(t *testing.T) { func TestEventClose(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String, false) w, err := newInsertEventWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0)) w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234", nil) err = w.AddDataToPayload("1234", nil)

View File

@ -19,14 +19,12 @@ package storage
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"fmt"
"io" "io"
"github.com/cockroachdb/errors" "github.com/cockroachdb/errors"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
) )
// EventTypeCode represents event type by code // EventTypeCode represents event type by code
@ -222,17 +220,8 @@ func NewBaseDescriptorEvent(collectionID int64, partitionID int64, segmentID int
return de return de
} }
func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int) (*insertEventWriter, error) { func newInsertEventWriter(dataType schemapb.DataType, opts ...PayloadWriterOptions) (*insertEventWriter, error) {
var payloadWriter PayloadWriterInterface payloadWriter, err := NewPayloadWriter(dataType, opts...)
var err error
if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
payloadWriter, err = NewPayloadWriter(dataType, nullable, dim[0])
} else {
payloadWriter, err = NewPayloadWriter(dataType, nullable)
}
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -253,8 +242,8 @@ func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int)
return writer, nil return writer, nil
} }
func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) { func newDeleteEventWriter(dataType schemapb.DataType, opts ...PayloadWriterOptions) (*deleteEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType, opts...)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -280,7 +269,7 @@ func newCreateCollectionEventWriter(dataType schemapb.DataType) (*createCollecti
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -306,7 +295,7 @@ func newDropCollectionEventWriter(dataType schemapb.DataType) (*dropCollectionEv
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -332,7 +321,7 @@ func newCreatePartitionEventWriter(dataType schemapb.DataType) (*createPartition
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -358,7 +347,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
return nil, errors.New("incorrect data type") return nil, errors.New("incorrect data type")
} }
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -380,7 +369,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
} }
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) { func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType, false) payloadWriter, err := NewPayloadWriter(dataType)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -59,11 +59,11 @@ func TestSizeofStruct(t *testing.T) {
} }
func TestEventWriter(t *testing.T) { func TestEventWriter(t *testing.T) {
insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32, false) insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32)
assert.NoError(t, err) assert.NoError(t, err)
insertEvent.Close() insertEvent.Close()
insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32, false) insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32)
assert.NoError(t, err) assert.NoError(t, err)
defer insertEvent.Close() defer insertEvent.Close()

View File

@ -32,7 +32,7 @@ import (
func TestPayload_ReaderAndWriter(t *testing.T) { func TestPayload_ReaderAndWriter(t *testing.T) {
t.Run("TestBool", func(t *testing.T) { t.Run("TestBool", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -69,7 +69,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt8", func(t *testing.T) { t.Run("TestInt8", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -109,7 +109,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt16", func(t *testing.T) { t.Run("TestInt16", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, false) w, err := NewPayloadWriter(schemapb.DataType_Int16)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -147,7 +147,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt32", func(t *testing.T) { t.Run("TestInt32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, false) w, err := NewPayloadWriter(schemapb.DataType_Int32)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -186,7 +186,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt64", func(t *testing.T) { t.Run("TestInt64", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, false) w, err := NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -225,7 +225,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestFloat32", func(t *testing.T) { t.Run("TestFloat32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, false) w, err := NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -264,7 +264,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestDouble", func(t *testing.T) { t.Run("TestDouble", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, false) w, err := NewPayloadWriter(schemapb.DataType_Double)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -303,7 +303,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddString", func(t *testing.T) { t.Run("TestAddString", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -351,7 +351,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddArray", func(t *testing.T) { t.Run("TestAddArray", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array, false) w, err := NewPayloadWriter(schemapb.DataType_Array)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -423,7 +423,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddJSON", func(t *testing.T) { t.Run("TestAddJSON", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON, false) w, err := NewPayloadWriter(schemapb.DataType_JSON)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -471,7 +471,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestBinaryVector", func(t *testing.T) { t.Run("TestBinaryVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -520,7 +520,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestFloatVector", func(t *testing.T) { t.Run("TestFloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 1) w, err := NewPayloadWriter(schemapb.DataType_FloatVector, WithDim(1))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -562,7 +562,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestFloat16Vector", func(t *testing.T) { t.Run("TestFloat16Vector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, false, 1) w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, WithDim(1))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -604,7 +604,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestBFloat16Vector", func(t *testing.T) { t.Run("TestBFloat16Vector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, false, 1) w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, WithDim(1))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -646,7 +646,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestSparseFloatVector", func(t *testing.T) { t.Run("TestSparseFloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -715,7 +715,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
testSparseOneBatch := func(t *testing.T, rows [][]byte, actualDim int) { testSparseOneBatch := func(t *testing.T, rows [][]byte, actualDim int) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -811,31 +811,8 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}, int(int32Max)) }, int(int32Max))
}) })
// t.Run("TestAddDataToPayload", func(t *testing.T) {
// w, err := NewPayloadWriter(schemapb.DataType_Bool)
// w.colType = 999
// require.Nil(t, err)
// require.NotNil(t, w)
// err = w.AddDataToPayload([]bool{false, false, false, false})
// assert.NotNil(t, err)
// err = w.AddDataToPayload([]bool{false, false, false, false}, 0)
// assert.NotNil(t, err)
// err = w.AddDataToPayload([]bool{false, false, false, false}, 0, 0)
// assert.NotNil(t, err)
// err = w.AddBoolToPayload([]bool{})
// assert.NotNil(t, err)
// err = w.FinishPayloadWriter()
// assert.Nil(t, err)
// err = w.AddBoolToPayload([]bool{false})
// assert.NotNil(t, err)
// })
t.Run("TestAddBoolAfterFinish", func(t *testing.T) { t.Run("TestAddBoolAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -851,7 +828,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt8AfterFinish", func(t *testing.T) { t.Run("TestAddInt8AfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -867,7 +844,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddInt16AfterFinish", func(t *testing.T) { t.Run("TestAddInt16AfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, false) w, err := NewPayloadWriter(schemapb.DataType_Int16)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -883,7 +860,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddInt32AfterFinish", func(t *testing.T) { t.Run("TestAddInt32AfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, false) w, err := NewPayloadWriter(schemapb.DataType_Int32)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -899,7 +876,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddInt64AfterFinish", func(t *testing.T) { t.Run("TestAddInt64AfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, false) w, err := NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -915,7 +892,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddFloatAfterFinish", func(t *testing.T) { t.Run("TestAddFloatAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, false) w, err := NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -931,7 +908,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddDoubleAfterFinish", func(t *testing.T) { t.Run("TestAddDoubleAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, false) w, err := NewPayloadWriter(schemapb.DataType_Double)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -947,7 +924,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddOneStringAfterFinish", func(t *testing.T) { t.Run("TestAddOneStringAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -963,7 +940,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddBinVectorAfterFinish", func(t *testing.T) { t.Run("TestAddBinVectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -987,7 +964,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddFloatVectorAfterFinish", func(t *testing.T) { t.Run("TestAddFloatVectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_FloatVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -1008,7 +985,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddFloat16VectorAfterFinish", func(t *testing.T) { t.Run("TestAddFloat16VectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_Float16Vector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -1032,7 +1009,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddBFloat16VectorAfterFinish", func(t *testing.T) { t.Run("TestAddBFloat16VectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_BFloat16Vector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -1056,7 +1033,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestAddSparseFloatVectorAfterFinish", func(t *testing.T) { t.Run("TestAddSparseFloatVectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector, false) w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
defer w.Close() defer w.Close()
@ -1100,7 +1077,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetBoolError", func(t *testing.T) { t.Run("TestGetBoolError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1124,7 +1101,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetBoolError2", func(t *testing.T) { t.Run("TestGetBoolError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1145,7 +1122,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt8Error", func(t *testing.T) { t.Run("TestGetInt8Error", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1169,7 +1146,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt8Error2", func(t *testing.T) { t.Run("TestGetInt8Error2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1190,7 +1167,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt16Error", func(t *testing.T) { t.Run("TestGetInt16Error", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1214,7 +1191,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt16Error2", func(t *testing.T) { t.Run("TestGetInt16Error2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, false) w, err := NewPayloadWriter(schemapb.DataType_Int16)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1235,7 +1212,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt32Error", func(t *testing.T) { t.Run("TestGetInt32Error", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1259,7 +1236,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt32Error2", func(t *testing.T) { t.Run("TestGetInt32Error2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, false) w, err := NewPayloadWriter(schemapb.DataType_Int32)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1280,7 +1257,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt64Error", func(t *testing.T) { t.Run("TestGetInt64Error", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1304,7 +1281,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetInt64Error2", func(t *testing.T) { t.Run("TestGetInt64Error2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, false) w, err := NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1325,7 +1302,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetFloatError", func(t *testing.T) { t.Run("TestGetFloatError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1349,7 +1326,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetFloatError2", func(t *testing.T) { t.Run("TestGetFloatError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, false) w, err := NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1370,7 +1347,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetDoubleError", func(t *testing.T) { t.Run("TestGetDoubleError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1394,7 +1371,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetDoubleError2", func(t *testing.T) { t.Run("TestGetDoubleError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, false) w, err := NewPayloadWriter(schemapb.DataType_Double)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1415,7 +1392,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetStringError", func(t *testing.T) { t.Run("TestGetStringError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1439,7 +1416,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetStringError2", func(t *testing.T) { t.Run("TestGetStringError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1464,7 +1441,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetArrayError", func(t *testing.T) { t.Run("TestGetArrayError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1488,7 +1465,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetBinaryVectorError", func(t *testing.T) { t.Run("TestGetBinaryVectorError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1512,7 +1489,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetBinaryVectorError2", func(t *testing.T) { t.Run("TestGetBinaryVectorError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1533,7 +1510,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetFloatVectorError", func(t *testing.T) { t.Run("TestGetFloatVectorError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1557,7 +1534,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
}) })
t.Run("TestGetFloatVectorError2", func(t *testing.T) { t.Run("TestGetFloatVectorError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_FloatVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1579,7 +1556,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestByteArrayDatasetError", func(t *testing.T) { t.Run("TestByteArrayDatasetError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1619,7 +1596,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
vec = append(vec, 1) vec = append(vec, 1)
} }
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false) w, err := NewPayloadWriter(schemapb.DataType_FloatVector)
assert.NoError(t, err) assert.NoError(t, err)
err = w.AddFloatVectorToPayload(vec, 128) err = w.AddFloatVectorToPayload(vec, 128)
@ -1635,7 +1612,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddBool with wrong valids", func(t *testing.T) { t.Run("TestAddBool with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1644,7 +1621,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt8 with wrong valids", func(t *testing.T) { t.Run("TestAddInt8 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1653,7 +1630,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt16 with wrong valids", func(t *testing.T) { t.Run("TestAddInt16 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, false) w, err := NewPayloadWriter(schemapb.DataType_Int16)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1662,7 +1639,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt32 with wrong valids", func(t *testing.T) { t.Run("TestAddInt32 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, false) w, err := NewPayloadWriter(schemapb.DataType_Int32)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1671,7 +1648,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt64 with wrong valids", func(t *testing.T) { t.Run("TestAddInt64 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, false) w, err := NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1680,7 +1657,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) { t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, false) w, err := NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1689,7 +1666,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddDouble with wrong valids", func(t *testing.T) { t.Run("TestAddDouble with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, false) w, err := NewPayloadWriter(schemapb.DataType_Double)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1698,7 +1675,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddAddString with wrong valids", func(t *testing.T) { t.Run("TestAddAddString with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1707,7 +1684,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddArray with wrong valids", func(t *testing.T) { t.Run("TestAddArray with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array, false) w, err := NewPayloadWriter(schemapb.DataType_Array)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1722,7 +1699,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddJSON with wrong valids", func(t *testing.T) { t.Run("TestAddJSON with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON, false) w, err := NewPayloadWriter(schemapb.DataType_JSON)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1733,7 +1710,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
func TestPayload_NullableReaderAndWriter(t *testing.T) { func TestPayload_NullableReaderAndWriter(t *testing.T) {
t.Run("TestBool", func(t *testing.T) { t.Run("TestBool", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, true) w, err := NewPayloadWriter(schemapb.DataType_Bool, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1770,7 +1747,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt8", func(t *testing.T) { t.Run("TestInt8", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, true) w, err := NewPayloadWriter(schemapb.DataType_Int8, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1810,7 +1787,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt16", func(t *testing.T) { t.Run("TestInt16", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, true) w, err := NewPayloadWriter(schemapb.DataType_Int16, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1848,7 +1825,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt32", func(t *testing.T) { t.Run("TestInt32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, true) w, err := NewPayloadWriter(schemapb.DataType_Int32, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1887,7 +1864,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestInt64", func(t *testing.T) { t.Run("TestInt64", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, true) w, err := NewPayloadWriter(schemapb.DataType_Int64, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1926,7 +1903,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestFloat32", func(t *testing.T) { t.Run("TestFloat32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, true) w, err := NewPayloadWriter(schemapb.DataType_Float, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -1965,7 +1942,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestDouble", func(t *testing.T) { t.Run("TestDouble", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, true) w, err := NewPayloadWriter(schemapb.DataType_Double, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2004,7 +1981,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddString", func(t *testing.T) { t.Run("TestAddString", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, true) w, err := NewPayloadWriter(schemapb.DataType_String, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2052,7 +2029,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddArray", func(t *testing.T) { t.Run("TestAddArray", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array, true) w, err := NewPayloadWriter(schemapb.DataType_Array, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2124,7 +2101,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddJSON", func(t *testing.T) { t.Run("TestAddJSON", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON, true) w, err := NewPayloadWriter(schemapb.DataType_JSON, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2172,22 +2149,22 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestBinaryVector", func(t *testing.T) { t.Run("TestBinaryVector", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_BinaryVector, true, 8) _, err := NewPayloadWriter(schemapb.DataType_BinaryVector, WithNullable(true), WithDim(8))
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
}) })
t.Run("TestFloatVector", func(t *testing.T) { t.Run("TestFloatVector", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_FloatVector, true, 1) _, err := NewPayloadWriter(schemapb.DataType_FloatVector, WithNullable(true), WithDim(1))
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
}) })
t.Run("TestFloat16Vector", func(t *testing.T) { t.Run("TestFloat16Vector", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_Float16Vector, true, 1) _, err := NewPayloadWriter(schemapb.DataType_Float16Vector, WithNullable(true), WithDim(1))
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
}) })
t.Run("TestAddBool with wrong valids", func(t *testing.T) { t.Run("TestAddBool with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, true) w, err := NewPayloadWriter(schemapb.DataType_Bool, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2196,7 +2173,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt8 with wrong valids", func(t *testing.T) { t.Run("TestAddInt8 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, true) w, err := NewPayloadWriter(schemapb.DataType_Int8, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2205,7 +2182,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt16 with wrong valids", func(t *testing.T) { t.Run("TestAddInt16 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, true) w, err := NewPayloadWriter(schemapb.DataType_Int16, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2214,7 +2191,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt32 with wrong valids", func(t *testing.T) { t.Run("TestAddInt32 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, true) w, err := NewPayloadWriter(schemapb.DataType_Int32, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2223,7 +2200,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddInt64 with wrong valids", func(t *testing.T) { t.Run("TestAddInt64 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, true) w, err := NewPayloadWriter(schemapb.DataType_Int64, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2232,7 +2209,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) { t.Run("TestAddFloat32 with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, true) w, err := NewPayloadWriter(schemapb.DataType_Float, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2241,7 +2218,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddDouble with wrong valids", func(t *testing.T) { t.Run("TestAddDouble with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, true) w, err := NewPayloadWriter(schemapb.DataType_Double, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2250,25 +2227,25 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddAddString with wrong valids", func(t *testing.T) { t.Run("TestAddAddString with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, true) w, err := NewPayloadWriter(schemapb.DataType_String, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload("hello0", nil) err = w.AddDataToPayload("hello0", nil)
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_String, true) w, err = NewPayloadWriter(schemapb.DataType_String, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload("hello0", []bool{false, false}) err = w.AddDataToPayload("hello0", []bool{false, false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_String, false) w, err = NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload("hello0", []bool{false}) err = w.AddDataToPayload("hello0", []bool{false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_String, false) w, err = NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload("hello0", []bool{true}) err = w.AddDataToPayload("hello0", []bool{true})
@ -2276,7 +2253,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddArray with wrong valids", func(t *testing.T) { t.Run("TestAddArray with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array, true) w, err := NewPayloadWriter(schemapb.DataType_Array, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload(&schemapb.ScalarField{ err = w.AddDataToPayload(&schemapb.ScalarField{
@ -2288,7 +2265,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}, nil) }, nil)
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_Array, true) w, err = NewPayloadWriter(schemapb.DataType_Array, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -2301,7 +2278,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}, []bool{false, false}) }, []bool{false, false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_Array, false) w, err = NewPayloadWriter(schemapb.DataType_Array)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload(&schemapb.ScalarField{ err = w.AddDataToPayload(&schemapb.ScalarField{
@ -2313,7 +2290,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}, []bool{false}) }, []bool{false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_Array, false) w, err = NewPayloadWriter(schemapb.DataType_Array)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload(&schemapb.ScalarField{ err = w.AddDataToPayload(&schemapb.ScalarField{
@ -2327,25 +2304,25 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
}) })
t.Run("TestAddJSON with wrong valids", func(t *testing.T) { t.Run("TestAddJSON with wrong valids", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON, true) w, err := NewPayloadWriter(schemapb.DataType_JSON, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload([]byte(`{"1":"1"}`), nil) err = w.AddDataToPayload([]byte(`{"1":"1"}`), nil)
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_JSON, true) w, err = NewPayloadWriter(schemapb.DataType_JSON, WithNullable(true))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false, false}) err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false, false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_JSON, false) w, err = NewPayloadWriter(schemapb.DataType_JSON)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false}) err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{false})
assert.ErrorIs(t, err, merr.ErrParameterInvalid) assert.ErrorIs(t, err, merr.ErrParameterInvalid)
w, err = NewPayloadWriter(schemapb.DataType_JSON, false) w, err = NewPayloadWriter(schemapb.DataType_JSON)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{true}) err = w.AddDataToPayload([]byte(`{"1":"1"}`), []bool{true})
@ -2355,7 +2332,7 @@ func TestPayload_NullableReaderAndWriter(t *testing.T) {
func TestArrowRecordReader(t *testing.T) { func TestArrowRecordReader(t *testing.T) {
t.Run("TestArrowRecordReader", func(t *testing.T) { t.Run("TestArrowRecordReader", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
assert.NoError(t, err) assert.NoError(t, err)
defer w.Close() defer w.Close()
@ -2395,7 +2372,7 @@ func TestArrowRecordReader(t *testing.T) {
} }
func dataGen(size int) ([]byte, error) { func dataGen(size int) ([]byte, error) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2422,7 +2399,7 @@ func dataGen(size int) ([]byte, error) {
} }
func BenchmarkDefaultReader(b *testing.B) { func BenchmarkDefaultReader(b *testing.B) {
size := 1000000 size := 10
buffer, err := dataGen(size) buffer, err := dataGen(size)
assert.NoError(b, err) assert.NoError(b, err)
@ -2446,7 +2423,7 @@ func BenchmarkDefaultReader(b *testing.B) {
} }
func BenchmarkDataSetReader(b *testing.B) { func BenchmarkDataSetReader(b *testing.B) {
size := 1000000 size := 10
buffer, err := dataGen(size) buffer, err := dataGen(size)
assert.NoError(b, err) assert.NoError(b, err)
@ -2474,7 +2451,7 @@ func BenchmarkDataSetReader(b *testing.B) {
} }
func BenchmarkArrowRecordReader(b *testing.B) { func BenchmarkArrowRecordReader(b *testing.B) {
size := 1000000 size := 10
buffer, err := dataGen(size) buffer, err := dataGen(size)
assert.NoError(b, err) assert.NoError(b, err)

View File

@ -39,6 +39,26 @@ import (
var _ PayloadWriterInterface = (*NativePayloadWriter)(nil) var _ PayloadWriterInterface = (*NativePayloadWriter)(nil)
type PayloadWriterOptions func(*NativePayloadWriter)
func WithNullable(nullable bool) PayloadWriterOptions {
return func(w *NativePayloadWriter) {
w.nullable = nullable
}
}
func WithWriterProps(writerProps *parquet.WriterProperties) PayloadWriterOptions {
return func(w *NativePayloadWriter) {
w.writerProps = writerProps
}
}
func WithDim(dim int) PayloadWriterOptions {
return func(w *NativePayloadWriter) {
w.dim = NewNullableInt(dim)
}
}
type NativePayloadWriter struct { type NativePayloadWriter struct {
dataType schemapb.DataType dataType schemapb.DataType
arrowType arrow.DataType arrowType arrow.DataType
@ -47,43 +67,42 @@ type NativePayloadWriter struct {
flushedRows int flushedRows int
output *bytes.Buffer output *bytes.Buffer
releaseOnce sync.Once releaseOnce sync.Once
dim int dim *NullableInt
nullable bool nullable bool
writerProps *parquet.WriterProperties
} }
func NewPayloadWriter(colType schemapb.DataType, nullable bool, dim ...int) (PayloadWriterInterface, error) { func NewPayloadWriter(colType schemapb.DataType, options ...PayloadWriterOptions) (PayloadWriterInterface, error) {
var arrowType arrow.DataType w := &NativePayloadWriter{
var dimension int
// writer for sparse float vector doesn't require dim
if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) {
if len(dim) != 1 {
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
if nullable {
return nil, merr.WrapErrParameterInvalidMsg("vector type not supprot nullable")
}
arrowType = milvusDataTypeToArrowType(colType, dim[0])
dimension = dim[0]
} else {
if len(dim) != 0 {
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
arrowType = milvusDataTypeToArrowType(colType, 1)
dimension = 1
}
builder := array.NewBuilder(memory.DefaultAllocator, arrowType)
return &NativePayloadWriter{
dataType: colType, dataType: colType,
arrowType: arrowType,
builder: builder,
finished: false, finished: false,
flushedRows: 0, flushedRows: 0,
output: new(bytes.Buffer), output: new(bytes.Buffer),
dim: dimension, nullable: false,
nullable: nullable, writerProps: parquet.NewWriterProperties(
}, nil parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
),
dim: &NullableInt{},
}
for _, o := range options {
o(w)
}
// writer for sparse float vector doesn't require dim
if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) {
if w.dim.IsNull() {
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
if w.nullable {
return nil, merr.WrapErrParameterInvalidMsg("vector type does not support nullable")
}
} else {
w.dim = NewNullableInt(1)
}
w.arrowType = milvusDataTypeToArrowType(colType, *w.dim.Value)
w.builder = array.NewBuilder(memory.DefaultAllocator, w.arrowType)
return w, nil
} }
func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []bool) error { func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []bool) error {
@ -192,25 +211,25 @@ func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []boo
if !ok { if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type") return merr.WrapErrParameterInvalidMsg("incorrect data type")
} }
return w.AddBinaryVectorToPayload(val, w.dim) return w.AddBinaryVectorToPayload(val, w.dim.GetValue())
case schemapb.DataType_FloatVector: case schemapb.DataType_FloatVector:
val, ok := data.([]float32) val, ok := data.([]float32)
if !ok { if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type") return merr.WrapErrParameterInvalidMsg("incorrect data type")
} }
return w.AddFloatVectorToPayload(val, w.dim) return w.AddFloatVectorToPayload(val, w.dim.GetValue())
case schemapb.DataType_Float16Vector: case schemapb.DataType_Float16Vector:
val, ok := data.([]byte) val, ok := data.([]byte)
if !ok { if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type") return merr.WrapErrParameterInvalidMsg("incorrect data type")
} }
return w.AddFloat16VectorToPayload(val, w.dim) return w.AddFloat16VectorToPayload(val, w.dim.GetValue())
case schemapb.DataType_BFloat16Vector: case schemapb.DataType_BFloat16Vector:
val, ok := data.([]byte) val, ok := data.([]byte)
if !ok { if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type") return merr.WrapErrParameterInvalidMsg("incorrect data type")
} }
return w.AddBFloat16VectorToPayload(val, w.dim) return w.AddBFloat16VectorToPayload(val, w.dim.GetValue())
case schemapb.DataType_SparseFloatVector: case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData) val, ok := data.(*SparseFloatVectorFieldData)
if !ok { if !ok {
@ -674,14 +693,10 @@ func (w *NativePayloadWriter) FinishPayloadWriter() error {
table := array.NewTable(schema, []arrow.Column{column}, int64(column.Len())) table := array.NewTable(schema, []arrow.Column{column}, int64(column.Len()))
defer table.Release() defer table.Release()
props := parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
)
return pqarrow.WriteTable(table, return pqarrow.WriteTable(table,
w.output, w.output,
1024*1024*1024, 1024*1024*1024,
props, w.writerProps,
pqarrow.DefaultWriterProps(), pqarrow.DefaultWriterProps(),
) )
} }

View File

@ -3,6 +3,7 @@ package storage
import ( import (
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@ -10,14 +11,11 @@ import (
func TestPayloadWriter_Failed(t *testing.T) { func TestPayloadWriter_Failed(t *testing.T) {
t.Run("wrong input", func(t *testing.T) { t.Run("wrong input", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_FloatVector, false) _, err := NewPayloadWriter(schemapb.DataType_FloatVector)
require.Error(t, err)
_, err = NewPayloadWriter(schemapb.DataType_Bool, false, 1)
require.Error(t, err) require.Error(t, err)
}) })
t.Run("Test Bool", func(t *testing.T) { t.Run("Test Bool", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool, false) w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -30,7 +28,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddBoolToPayload([]bool{false}, nil) err = w.AddBoolToPayload([]bool{false}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -39,7 +37,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Byte", func(t *testing.T) { t.Run("Test Byte", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty) w, err := NewPayloadWriter(schemapb.DataType_Int8, WithNullable(Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -52,7 +50,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddByteToPayload([]byte{0}, nil) err = w.AddByteToPayload([]byte{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -61,7 +59,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Int8", func(t *testing.T) { t.Run("Test Int8", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8, false) w, err := NewPayloadWriter(schemapb.DataType_Int8)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -74,7 +72,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddInt8ToPayload([]int8{0}, nil) err = w.AddInt8ToPayload([]int8{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -83,7 +81,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Int16", func(t *testing.T) { t.Run("Test Int16", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16, false) w, err := NewPayloadWriter(schemapb.DataType_Int16)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -96,7 +94,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddInt16ToPayload([]int16{0}, nil) err = w.AddInt16ToPayload([]int16{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -105,7 +103,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Int32", func(t *testing.T) { t.Run("Test Int32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32, false) w, err := NewPayloadWriter(schemapb.DataType_Int32)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -118,7 +116,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddInt32ToPayload([]int32{0}, nil) err = w.AddInt32ToPayload([]int32{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -127,7 +125,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Int64", func(t *testing.T) { t.Run("Test Int64", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty) w, err := NewPayloadWriter(schemapb.DataType_Int64, WithNullable(Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -140,7 +138,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddInt64ToPayload([]int64{0}, nil) err = w.AddInt64ToPayload([]int64{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float, false) w, err = NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -149,7 +147,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Float", func(t *testing.T) { t.Run("Test Float", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float, false) w, err := NewPayloadWriter(schemapb.DataType_Float)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -162,7 +160,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddFloatToPayload([]float32{0}, nil) err = w.AddFloatToPayload([]float32{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -171,7 +169,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Double", func(t *testing.T) { t.Run("Test Double", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double, false) w, err := NewPayloadWriter(schemapb.DataType_Double)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -184,7 +182,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddDoubleToPayload([]float64{0}, nil) err = w.AddDoubleToPayload([]float64{0}, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -193,7 +191,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test String", func(t *testing.T) { t.Run("Test String", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String, false) w, err := NewPayloadWriter(schemapb.DataType_String)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -203,7 +201,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddOneStringToPayload("test", false) err = w.AddOneStringToPayload("test", false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -212,7 +210,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Array", func(t *testing.T) { t.Run("Test Array", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array, false) w, err := NewPayloadWriter(schemapb.DataType_Array)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -222,7 +220,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false) err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -231,7 +229,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test Json", func(t *testing.T) { t.Run("Test Json", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON, false) w, err := NewPayloadWriter(schemapb.DataType_JSON)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -241,7 +239,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddOneJSONToPayload([]byte{0, 1}, false) err = w.AddOneJSONToPayload([]byte{0, 1}, false)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -250,7 +248,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test BinaryVector", func(t *testing.T) { t.Run("Test BinaryVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -265,7 +263,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddBinaryVectorToPayload(data, 8) err = w.AddBinaryVectorToPayload(data, 8)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -274,7 +272,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
}) })
t.Run("Test FloatVector", func(t *testing.T) { t.Run("Test FloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8) w, err := NewPayloadWriter(schemapb.DataType_FloatVector, WithDim(8))
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -292,7 +290,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddFloatToPayload(data, nil) err = w.AddFloatToPayload(data, nil)
require.Error(t, err) require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false) w, err = NewPayloadWriter(schemapb.DataType_Int64)
require.Nil(t, err) require.Nil(t, err)
require.NotNil(t, w) require.NotNil(t, w)
@ -300,3 +298,33 @@ func TestPayloadWriter_Failed(t *testing.T) {
require.Error(t, err) require.Error(t, err)
}) })
} }
func TestParquetEncoding(t *testing.T) {
t.Run("test int64 pk", func(t *testing.T) {
field := &schemapb.FieldSchema{IsPrimaryKey: true, DataType: schemapb.DataType_Int64}
w, err := NewPayloadWriter(schemapb.DataType_Int64, WithWriterProps(getFieldWriterProps(field)))
assert.NoError(t, err)
err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = w.FinishPayloadWriter()
assert.True(t, !w.(*NativePayloadWriter).writerProps.DictionaryEnabled())
assert.NoError(t, err)
})
t.Run("test string pk", func(t *testing.T) {
field := &schemapb.FieldSchema{IsPrimaryKey: true, DataType: schemapb.DataType_String}
w, err := NewPayloadWriter(schemapb.DataType_String, WithWriterProps(getFieldWriterProps(field)))
assert.NoError(t, err)
err = w.AddOneStringToPayload("1", true)
assert.NoError(t, err)
err = w.FinishPayloadWriter()
assert.True(t, !w.(*NativePayloadWriter).writerProps.DictionaryEnabled())
assert.NoError(t, err)
})
}

View File

@ -40,7 +40,7 @@ func TestPrintBinlogFilesInt64(t *testing.T) {
curTS := time.Now().UnixNano() / int64(time.Millisecond) curTS := time.Now().UnixNano() / int64(time.Millisecond)
e1, err := w.NextInsertEventWriter(false) e1, err := w.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil) err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err) assert.NoError(t, err)
@ -50,7 +50,7 @@ func TestPrintBinlogFilesInt64(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0)) e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0))
e2, err := w.NextInsertEventWriter(false) e2, err := w.NextInsertEventWriter()
assert.NoError(t, err) assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil) err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -521,6 +521,23 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry {
return m return m
}() }()
// Since parquet does not support custom fallback encoding for now,
// we disable dict encoding for primary key.
// It can be scale to all fields once parquet fallback encoding is available.
func getFieldWriterProps(field *schemapb.FieldSchema) *parquet.WriterProperties {
if field.GetIsPrimaryKey() {
return parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
parquet.WithDictionaryDefault(false),
)
}
return parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
)
}
type DeserializeReader[T any] struct { type DeserializeReader[T any] struct {
rr RecordReader rr RecordReader
deserializer Deserializer[T] deserializer Deserializer[T]
@ -654,12 +671,21 @@ func newCompositeRecordWriter(writers map[FieldID]RecordWriter) *compositeRecord
var _ RecordWriter = (*singleFieldRecordWriter)(nil) var _ RecordWriter = (*singleFieldRecordWriter)(nil)
type RecordWriterOptions func(*singleFieldRecordWriter)
func WithRecordWriterProps(writerProps *parquet.WriterProperties) RecordWriterOptions {
return func(w *singleFieldRecordWriter) {
w.writerProps = writerProps
}
}
type singleFieldRecordWriter struct { type singleFieldRecordWriter struct {
fw *pqarrow.FileWriter fw *pqarrow.FileWriter
fieldId FieldID fieldId FieldID
schema *arrow.Schema schema *arrow.Schema
numRows int numRows int
writerProps *parquet.WriterProperties
} }
func (sfw *singleFieldRecordWriter) Write(r Record) error { func (sfw *singleFieldRecordWriter) Write(r Record) error {
@ -674,23 +700,24 @@ func (sfw *singleFieldRecordWriter) Close() {
sfw.fw.Close() sfw.fw.Close()
} }
func newSingleFieldRecordWriter(fieldId FieldID, field arrow.Field, writer io.Writer) (*singleFieldRecordWriter, error) { func newSingleFieldRecordWriter(fieldId FieldID, field arrow.Field, writer io.Writer, opts ...RecordWriterOptions) (*singleFieldRecordWriter, error) {
schema := arrow.NewSchema([]arrow.Field{field}, nil) w := &singleFieldRecordWriter{
fieldId: fieldId,
// use writer properties as same as payload writer's for now schema: arrow.NewSchema([]arrow.Field{field}, nil),
fw, err := pqarrow.NewFileWriter(schema, writer, writerProps: parquet.NewWriterProperties(
parquet.NewWriterProperties( parquet.WithMaxRowGroupLength(math.MaxInt64), // No additional grouping for now.
parquet.WithCompression(compress.Codecs.Zstd), parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3)), parquet.WithCompressionLevel(3)),
pqarrow.DefaultWriterProps()) }
for _, o := range opts {
o(w)
}
fw, err := pqarrow.NewFileWriter(w.schema, writer, w.writerProps, pqarrow.DefaultWriterProps())
if err != nil { if err != nil {
return nil, err return nil, err
} }
return &singleFieldRecordWriter{ w.fw = fw
fw: fw, return w, nil
fieldId: fieldId,
schema: schema,
}, nil
} }
var _ RecordWriter = (*multiFieldRecordWriter)(nil) var _ RecordWriter = (*multiFieldRecordWriter)(nil)

View File

@ -279,7 +279,7 @@ func (bsw *BinlogStreamWriter) GetRecordWriter() (RecordWriter, error) {
Name: strconv.Itoa(int(fid)), Name: strconv.Itoa(int(fid)),
Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)), Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)),
Nullable: true, // No nullable check here. Nullable: true, // No nullable check here.
}, &bsw.buf) }, &bsw.buf, WithRecordWriterProps(getFieldWriterProps(bsw.fieldSchema)))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -431,7 +431,7 @@ func (dsw *DeltalogStreamWriter) GetRecordWriter() (RecordWriter, error) {
Name: dsw.fieldSchema.Name, Name: dsw.fieldSchema.Name,
Type: serdeMap[dsw.fieldSchema.DataType].arrowType(int(dim)), Type: serdeMap[dsw.fieldSchema.DataType].arrowType(int(dim)),
Nullable: false, Nullable: false,
}, &dsw.buf) }, &dsw.buf, WithRecordWriterProps(getFieldWriterProps(dsw.fieldSchema)))
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -141,6 +141,11 @@ func TestBinlogSerializeWriter(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
} }
for _, f := range schema.Fields {
props := writers[f.FieldID].rw.writerProps
assert.Equal(t, !f.IsPrimaryKey, props.DictionaryEnabled())
}
err = reader.Next() err = reader.Next()
assert.Equal(t, io.EOF, err) assert.Equal(t, io.EOF, err)
err = writer.Close() err = writer.Close()
@ -158,8 +163,13 @@ func TestBinlogSerializeWriter(t *testing.T) {
newblobs[i] = blob newblobs[i] = blob
i++ i++
} }
// Both field pk and field 17 are with datatype string and auto id
// in test data. Field pk uses delta byte array encoding, while
// field 17 uses dict encoding.
assert.Less(t, writers[16].buf.Len(), writers[17].buf.Len())
// assert.Equal(t, blobs[0].Value, newblobs[0].Value) // assert.Equal(t, blobs[0].Value, newblobs[0].Value)
reader, err = NewBinlogDeserializeReader(blobs, common.RowIDField) reader, err = NewBinlogDeserializeReader(newblobs, common.RowIDField)
assert.NoError(t, err) assert.NoError(t, err)
defer reader.Close() defer reader.Close()
for i := 1; i <= size; i++ { for i := 1; i <= size; i++ {

View File

@ -1303,3 +1303,21 @@ func GetFilesSize(ctx context.Context, paths []string, cm ChunkManager) (int64,
} }
return totalSize, nil return totalSize, nil
} }
type NullableInt struct {
Value *int
}
// NewNullableInt creates a new NullableInt instance
func NewNullableInt(value int) *NullableInt {
return &NullableInt{Value: &value}
}
func (ni NullableInt) GetValue() int {
return *ni.Value
}
// IsNull checks if the NullableInt is null
func (ni NullableInt) IsNull() bool {
return ni.Value == nil
}

View File

@ -81,7 +81,7 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie
dim = 1 dim = 1
} }
evt, err := w.NextInsertEventWriter(false, int(dim)) evt, err := w.NextInsertEventWriter(storage.WithDim(int(dim)))
assert.NoError(t, err) assert.NoError(t, err)
evt.SetEventTimestamp(1, math.MaxInt64) evt.SetEventTimestamp(1, math.MaxInt64)