mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: enable stream writer in compactions (#32612)
See #31679 --------- Signed-off-by: Ted Xu <ted.xu@zilliz.com>
This commit is contained in:
parent
b560602885
commit
a9c7ce72b8
@ -80,22 +80,18 @@ func genDeltaBlobs(b io.BinlogIO, allocator allocator.Allocator, data *DeleteDat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// genInsertBlobs returns insert-paths and save blob to kvs
|
// genInsertBlobs returns insert-paths and save blob to kvs
|
||||||
func genInsertBlobs(b io.BinlogIO, allocator allocator.Allocator, data *InsertData, collectionID, partID, segID UniqueID, iCodec *storage.InsertCodec, kvs map[string][]byte) (map[UniqueID]*datapb.FieldBinlog, error) {
|
func genInsertBlobs(b io.BinlogIO, allocator allocator.Allocator, data []*Blob, collectionID, partID, segID UniqueID, kvs map[string][]byte,
|
||||||
inlogs, err := iCodec.Serialize(partID, segID, data)
|
) (map[UniqueID]*datapb.FieldBinlog, error) {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
inpaths := make(map[UniqueID]*datapb.FieldBinlog)
|
inpaths := make(map[UniqueID]*datapb.FieldBinlog)
|
||||||
notifyGenIdx := make(chan struct{})
|
notifyGenIdx := make(chan struct{})
|
||||||
defer close(notifyGenIdx)
|
defer close(notifyGenIdx)
|
||||||
|
|
||||||
generator, err := allocator.GetGenerator(len(inlogs), notifyGenIdx)
|
generator, err := allocator.GetGenerator(len(data), notifyGenIdx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, blob := range inlogs {
|
for _, blob := range data {
|
||||||
// Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt
|
// Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt
|
||||||
fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64)
|
fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64)
|
||||||
k := metautil.JoinIDPath(collectionID, partID, segID, fID, <-generator)
|
k := metautil.JoinIDPath(collectionID, partID, segID, fID, <-generator)
|
||||||
@ -177,22 +173,21 @@ func uploadInsertLog(
|
|||||||
collectionID UniqueID,
|
collectionID UniqueID,
|
||||||
partID UniqueID,
|
partID UniqueID,
|
||||||
segID UniqueID,
|
segID UniqueID,
|
||||||
iData *InsertData,
|
data []*Blob,
|
||||||
iCodec *storage.InsertCodec,
|
|
||||||
) (map[UniqueID]*datapb.FieldBinlog, error) {
|
) (map[UniqueID]*datapb.FieldBinlog, error) {
|
||||||
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadInsertLog")
|
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadInsertLog")
|
||||||
defer span.End()
|
defer span.End()
|
||||||
kvs := make(map[string][]byte)
|
kvs := make(map[string][]byte)
|
||||||
|
|
||||||
if iData.IsEmpty() {
|
if len(data) <= 0 || data[0].RowNum <= 0 {
|
||||||
log.Warn("binlog io uploading empty insert data",
|
log.Warn("binlog io uploading empty insert data",
|
||||||
zap.Int64("segmentID", segID),
|
zap.Int64("segmentID", segID),
|
||||||
zap.Int64("collectionID", iCodec.Schema.GetID()),
|
zap.Int64("collectionID", collectionID),
|
||||||
)
|
)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
inpaths, err := genInsertBlobs(b, allocator, iData, collectionID, partID, segID, iCodec, kvs)
|
inpaths, err := genInsertBlobs(b, allocator, data, collectionID, partID, segID, kvs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -124,21 +124,17 @@ func TestBinlogIOInterfaceMethods(t *testing.T) {
|
|||||||
f := &MetaFactory{}
|
f := &MetaFactory{}
|
||||||
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64)
|
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64)
|
||||||
|
|
||||||
t.Run("empty insert", func(t *testing.T) {
|
|
||||||
alloc := allocator.NewMockAllocator(t)
|
|
||||||
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
|
||||||
paths, err := uploadInsertLog(context.Background(), binlogIO, alloc, meta.GetID(), 10, 1, genEmptyInsertData(), iCodec)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Nil(t, paths)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("gen insert blob failed", func(t *testing.T) {
|
t.Run("gen insert blob failed", func(t *testing.T) {
|
||||||
alloc := allocator.NewMockAllocator(t)
|
alloc := allocator.NewMockAllocator(t)
|
||||||
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 10
|
||||||
|
var segId int64 = 1
|
||||||
|
iData := genInsertData(2)
|
||||||
|
blobs, err := iCodec.Serialize(10, 1, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(nil, fmt.Errorf("mock err"))
|
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(nil, fmt.Errorf("mock err"))
|
||||||
_, err := uploadInsertLog(context.Background(), binlogIO, alloc, meta.GetID(), 10, 1, genInsertData(2), iCodec)
|
_, err = uploadInsertLog(context.Background(), binlogIO, alloc, meta.GetID(), partId, segId, blobs)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -147,13 +143,18 @@ func TestBinlogIOInterfaceMethods(t *testing.T) {
|
|||||||
alloc := allocator.NewMockAllocator(t)
|
alloc := allocator.NewMockAllocator(t)
|
||||||
binlogIO := io.NewBinlogIO(mkc, getOrCreateIOPool())
|
binlogIO := io.NewBinlogIO(mkc, getOrCreateIOPool())
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 1
|
||||||
|
var segId int64 = 10
|
||||||
|
iData := genInsertData(2)
|
||||||
|
blobs, err := iCodec.Serialize(10, 1, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil)
|
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil)
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
_, err := uploadInsertLog(ctx, binlogIO, alloc, meta.GetID(), 1, 10, genInsertData(2), iCodec)
|
_, err = uploadInsertLog(ctx, binlogIO, alloc, meta.GetID(), partId, segId, blobs)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
@ -256,9 +257,13 @@ func TestBinlogIOInnerMethods(t *testing.T) {
|
|||||||
t.Run(test.description, func(t *testing.T) {
|
t.Run(test.description, func(t *testing.T) {
|
||||||
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", test.pkType)
|
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", test.pkType)
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 10
|
||||||
|
var segId int64 = 1
|
||||||
|
iData := genInsertData(2)
|
||||||
|
blobs, err := iCodec.Serialize(10, 1, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
kvs := make(map[string][]byte)
|
kvs := make(map[string][]byte)
|
||||||
pin, err := genInsertBlobs(binlogIO, alloc, genInsertData(2), meta.GetID(), 10, 1, iCodec, kvs)
|
pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs)
|
||||||
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(pin))
|
assert.Equal(t, 12, len(pin))
|
||||||
@ -277,30 +282,22 @@ func TestBinlogIOInnerMethods(t *testing.T) {
|
|||||||
cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir))
|
cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir))
|
||||||
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
||||||
|
|
||||||
t.Run("serialize error", func(t *testing.T) {
|
|
||||||
iCodec := storage.NewInsertCodecWithSchema(nil)
|
|
||||||
|
|
||||||
alloc := allocator.NewMockAllocator(t)
|
|
||||||
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
|
||||||
kvs := make(map[string][]byte)
|
|
||||||
pin, err := genInsertBlobs(binlogIO, alloc, genEmptyInsertData(), 0, 10, 1, iCodec, kvs)
|
|
||||||
|
|
||||||
assert.Error(t, err)
|
|
||||||
assert.Empty(t, kvs)
|
|
||||||
assert.Empty(t, pin)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("GetGenerator error", func(t *testing.T) {
|
t.Run("GetGenerator error", func(t *testing.T) {
|
||||||
f := &MetaFactory{}
|
f := &MetaFactory{}
|
||||||
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64)
|
meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64)
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 10
|
||||||
|
var segId int64 = 1
|
||||||
|
iData := genInsertData(2)
|
||||||
|
blobs, err := iCodec.Serialize(partId, segId, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
alloc := allocator.NewMockAllocator(t)
|
alloc := allocator.NewMockAllocator(t)
|
||||||
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock GetGenerator error"))
|
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock GetGenerator error"))
|
||||||
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
kvs := make(map[string][]byte)
|
kvs := make(map[string][]byte)
|
||||||
|
|
||||||
pin, err := genInsertBlobs(binlogIO, alloc, genInsertData(2), meta.GetID(), 10, 1, iCodec, kvs)
|
pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs)
|
||||||
|
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Empty(t, kvs)
|
assert.Empty(t, kvs)
|
||||||
|
|||||||
@ -55,8 +55,6 @@ var (
|
|||||||
errContext = errors.New("context done or timeout")
|
errContext = errors.New("context done or timeout")
|
||||||
)
|
)
|
||||||
|
|
||||||
type iterator = storage.Iterator
|
|
||||||
|
|
||||||
type compactor interface {
|
type compactor interface {
|
||||||
complete()
|
complete()
|
||||||
compact() (*datapb.CompactionPlanResult, error)
|
compact() (*datapb.CompactionPlanResult, error)
|
||||||
@ -174,48 +172,15 @@ func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob) (map[interf
|
|||||||
return pk2ts, nil
|
return pk2ts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTask) uploadRemainLog(
|
func newBinlogWriter(collectionId, partitionId, segmentId UniqueID, schema *schemapb.CollectionSchema,
|
||||||
ctxTimeout context.Context,
|
) (writer *storage.SerializeWriter[*storage.Value], closers []func() (*Blob, error), err error) {
|
||||||
targetSegID UniqueID,
|
fieldWriters := storage.NewBinlogStreamWriters(collectionId, partitionId, segmentId, schema.Fields)
|
||||||
partID UniqueID,
|
closers = make([]func() (*Blob, error), 0, len(fieldWriters))
|
||||||
meta *etcdpb.CollectionMeta,
|
for _, w := range fieldWriters {
|
||||||
stats *storage.PrimaryKeyStats,
|
closers = append(closers, w.Finalize)
|
||||||
totRows int64,
|
|
||||||
writeBuffer *storage.InsertData,
|
|
||||||
) (map[UniqueID]*datapb.FieldBinlog, map[UniqueID]*datapb.FieldBinlog, error) {
|
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
|
||||||
inPaths := make(map[int64]*datapb.FieldBinlog, 0)
|
|
||||||
var err error
|
|
||||||
if !writeBuffer.IsEmpty() {
|
|
||||||
inPaths, err = uploadInsertLog(ctxTimeout, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, writeBuffer, iCodec)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
}
|
||||||
}
|
writer, err = storage.NewBinlogSerializeWriter(schema, partitionId, segmentId, fieldWriters, 1024)
|
||||||
|
return
|
||||||
statPaths, err := uploadStatsLog(ctxTimeout, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, stats, totRows, iCodec)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return inPaths, statPaths, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *compactionTask) uploadSingleInsertLog(
|
|
||||||
ctxTimeout context.Context,
|
|
||||||
targetSegID UniqueID,
|
|
||||||
partID UniqueID,
|
|
||||||
meta *etcdpb.CollectionMeta,
|
|
||||||
writeBuffer *storage.InsertData,
|
|
||||||
) (map[UniqueID]*datapb.FieldBinlog, error) {
|
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
|
||||||
|
|
||||||
inPaths, err := uploadInsertLog(ctxTimeout, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, writeBuffer, iCodec)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return inPaths, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTask) merge(
|
func (t *compactionTask) merge(
|
||||||
@ -231,9 +196,14 @@ func (t *compactionTask) merge(
|
|||||||
log := log.With(zap.Int64("planID", t.getPlanID()))
|
log := log.With(zap.Int64("planID", t.getPlanID()))
|
||||||
mergeStart := time.Now()
|
mergeStart := time.Now()
|
||||||
|
|
||||||
|
writer, finalizers, err := newBinlogWriter(meta.GetID(), partID, targetSegID, meta.GetSchema())
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
numBinlogs int // binlog number
|
numBinlogs int // binlog number
|
||||||
numRows int64 // the number of rows uploaded
|
numRows uint64 // the number of rows uploaded
|
||||||
expired int64 // the number of expired entity
|
expired int64 // the number of expired entity
|
||||||
|
|
||||||
insertField2Path = make(map[UniqueID]*datapb.FieldBinlog)
|
insertField2Path = make(map[UniqueID]*datapb.FieldBinlog)
|
||||||
@ -242,10 +212,6 @@ func (t *compactionTask) merge(
|
|||||||
statField2Path = make(map[UniqueID]*datapb.FieldBinlog)
|
statField2Path = make(map[UniqueID]*datapb.FieldBinlog)
|
||||||
statPaths = make([]*datapb.FieldBinlog, 0)
|
statPaths = make([]*datapb.FieldBinlog, 0)
|
||||||
)
|
)
|
||||||
writeBuffer, err := storage.NewInsertData(meta.GetSchema())
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, -1, err
|
|
||||||
}
|
|
||||||
|
|
||||||
isDeletedValue := func(v *storage.Value) bool {
|
isDeletedValue := func(v *storage.Value) bool {
|
||||||
ts, ok := delta[v.PK.GetValue()]
|
ts, ok := delta[v.PK.GetValue()]
|
||||||
@ -306,7 +272,7 @@ func (t *compactionTask) merge(
|
|||||||
numRows = 0
|
numRows = 0
|
||||||
numBinlogs = 0
|
numBinlogs = 0
|
||||||
currentTs := t.GetCurrentTime()
|
currentTs := t.GetCurrentTime()
|
||||||
currentRows := 0
|
unflushedRows := 0
|
||||||
downloadTimeCost := time.Duration(0)
|
downloadTimeCost := time.Duration(0)
|
||||||
uploadInsertTimeCost := time.Duration(0)
|
uploadInsertTimeCost := time.Duration(0)
|
||||||
|
|
||||||
@ -325,6 +291,30 @@ func (t *compactionTask) merge(
|
|||||||
timestampFrom int64 = -1
|
timestampFrom int64 = -1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
flush := func() error {
|
||||||
|
uploadInsertStart := time.Now()
|
||||||
|
writer.Close()
|
||||||
|
fieldData := make([]*Blob, len(finalizers))
|
||||||
|
|
||||||
|
for i, f := range finalizers {
|
||||||
|
blob, err := f()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fieldData[i] = blob
|
||||||
|
}
|
||||||
|
inPaths, err := uploadInsertLog(ctx, t.binlogIO, t.Allocator, meta.ID, partID, targetSegID, fieldData)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("failed to upload single insert log", zap.Error(err))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
numBinlogs += len(inPaths)
|
||||||
|
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
||||||
|
addInsertFieldPath(inPaths, timestampFrom, timestampTo)
|
||||||
|
unflushedRows = 0
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
for _, path := range unMergedInsertlogs {
|
for _, path := range unMergedInsertlogs {
|
||||||
downloadStart := time.Now()
|
downloadStart := time.Now()
|
||||||
data, err := downloadBlobs(ctx, t.binlogIO, path)
|
data, err := downloadBlobs(ctx, t.binlogIO, path)
|
||||||
@ -370,55 +360,50 @@ func (t *compactionTask) merge(
|
|||||||
timestampTo = v.Timestamp
|
timestampTo = v.Timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
row, ok := v.Value.(map[UniqueID]interface{})
|
err = writer.Write(v)
|
||||||
if !ok {
|
|
||||||
log.Warn("transfer interface to map wrong", zap.Strings("path", path))
|
|
||||||
return nil, nil, 0, errors.New("unexpected error")
|
|
||||||
}
|
|
||||||
|
|
||||||
err = writeBuffer.Append(row)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
|
numRows++
|
||||||
|
unflushedRows++
|
||||||
|
|
||||||
currentRows++
|
|
||||||
stats.Update(v.PK)
|
stats.Update(v.PK)
|
||||||
|
|
||||||
// check size every 100 rows in case of too many `GetMemorySize` call
|
// check size every 100 rows in case of too many `GetMemorySize` call
|
||||||
if (currentRows+1)%100 == 0 && writeBuffer.GetMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsInt() {
|
if (unflushedRows+1)%100 == 0 {
|
||||||
numRows += int64(writeBuffer.GetRowNum())
|
writer.Flush() // Flush to update memory size
|
||||||
uploadInsertStart := time.Now()
|
|
||||||
inPaths, err := t.uploadSingleInsertLog(ctx, targetSegID, partID, meta, writeBuffer)
|
if writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64() {
|
||||||
if err != nil {
|
if err := flush(); err != nil {
|
||||||
log.Warn("failed to upload single insert log", zap.Error(err))
|
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
|
||||||
addInsertFieldPath(inPaths, timestampFrom, timestampTo)
|
|
||||||
timestampFrom = -1
|
timestampFrom = -1
|
||||||
timestampTo = -1
|
timestampTo = -1
|
||||||
|
|
||||||
writeBuffer, _ = storage.NewInsertData(meta.GetSchema())
|
writer, finalizers, err = newBinlogWriter(meta.ID, targetSegID, partID, meta.Schema)
|
||||||
currentRows = 0
|
|
||||||
numBinlogs++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// upload stats log and remain insert rows
|
|
||||||
if writeBuffer.GetRowNum() > 0 || numRows > 0 {
|
|
||||||
numRows += int64(writeBuffer.GetRowNum())
|
|
||||||
uploadStart := time.Now()
|
|
||||||
inPaths, statsPaths, err := t.uploadRemainLog(ctx, targetSegID, partID, meta,
|
|
||||||
stats, numRows+int64(currentRows), writeBuffer)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uploadInsertTimeCost += time.Since(uploadStart)
|
// final flush if there is unflushed rows
|
||||||
addInsertFieldPath(inPaths, timestampFrom, timestampTo)
|
if unflushedRows > 0 {
|
||||||
|
if err := flush(); err != nil {
|
||||||
|
return nil, nil, 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// upload stats log
|
||||||
|
if numRows > 0 {
|
||||||
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
statsPaths, err := uploadStatsLog(ctx, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, stats, int64(numRows), iCodec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, 0, err
|
||||||
|
}
|
||||||
addStatFieldPath(statsPaths)
|
addStatFieldPath(statsPaths)
|
||||||
numBinlogs += len(inPaths)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, path := range insertField2Path {
|
for _, path := range insertField2Path {
|
||||||
@ -430,14 +415,14 @@ func (t *compactionTask) merge(
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Info("compact merge end",
|
log.Info("compact merge end",
|
||||||
zap.Int64("remaining insert numRows", numRows),
|
zap.Uint64("remaining insert numRows", numRows),
|
||||||
zap.Int64("expired entities", expired),
|
zap.Int64("expired entities", expired),
|
||||||
zap.Int("binlog file number", numBinlogs),
|
zap.Int("binlog file number", numBinlogs),
|
||||||
zap.Duration("download insert log elapse", downloadTimeCost),
|
zap.Duration("download insert log elapse", downloadTimeCost),
|
||||||
zap.Duration("upload insert log elapse", uploadInsertTimeCost),
|
zap.Duration("upload insert log elapse", uploadInsertTimeCost),
|
||||||
zap.Duration("merge elapse", time.Since(mergeStart)))
|
zap.Duration("merge elapse", time.Since(mergeStart)))
|
||||||
|
|
||||||
return insertPaths, statPaths, numRows, nil
|
return insertPaths, statPaths, int64(numRows), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTask) compact() (*datapb.CompactionPlanResult, error) {
|
func (t *compactionTask) compact() (*datapb.CompactionPlanResult, error) {
|
||||||
|
|||||||
@ -21,7 +21,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
@ -296,8 +295,12 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -336,18 +339,22 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
})
|
})
|
||||||
t.Run("Merge without expiration2", func(t *testing.T) {
|
t.Run("Merge without expiration2", func(t *testing.T) {
|
||||||
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
|
iData := genInsertDataWithExpiredTS()
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetValue()
|
BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetValue()
|
||||||
defer func() {
|
defer func() {
|
||||||
Params.Save(Params.DataNodeCfg.BinLogMaxSize.Key, BinLogMaxSize)
|
Params.Save(Params.DataNodeCfg.BinLogMaxSize.Key, BinLogMaxSize)
|
||||||
}()
|
}()
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "64")
|
paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "64")
|
||||||
iData := genInsertDataWithExpiredTS()
|
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -394,9 +401,13 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
}()
|
}()
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "1")
|
paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "1")
|
||||||
iData := genInsertData(101)
|
iData := genInsertData(101)
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, segmentId, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -440,10 +451,14 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -485,6 +500,10 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
metaCache := metacache.NewMockMetaCache(t)
|
metaCache := metacache.NewMockMetaCache(t)
|
||||||
metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe()
|
metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe()
|
||||||
@ -499,7 +518,7 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -539,10 +558,14 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -586,10 +609,14 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
iCodec := storage.NewInsertCodecWithSchema(meta)
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(t, err)
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
|
|
||||||
var allPaths [][]string
|
var allPaths [][]string
|
||||||
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), 0, 1, iData, iCodec)
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, 12, len(inpath))
|
assert.Equal(t, 12, len(inpath))
|
||||||
binlogNum := len(inpath[0].GetBinlogs())
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
@ -714,32 +741,6 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
|||||||
_, err := ct.getNumRows()
|
_, err := ct.getNumRows()
|
||||||
assert.Error(t, err, "segment not found")
|
assert.Error(t, err, "segment not found")
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("Test uploadRemainLog error", func(t *testing.T) {
|
|
||||||
f := &MetaFactory{}
|
|
||||||
|
|
||||||
t.Run("upload failed", func(t *testing.T) {
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
alloc := allocator.NewMockAllocator(t)
|
|
||||||
alloc.EXPECT().AllocOne().Call.Return(int64(11111), nil)
|
|
||||||
|
|
||||||
meta := f.GetCollectionMeta(UniqueID(10001), "test_upload_remain_log", schemapb.DataType_Int64)
|
|
||||||
stats, err := storage.NewPrimaryKeyStats(106, int64(schemapb.DataType_Int64), 10)
|
|
||||||
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
ct := &compactionTask{
|
|
||||||
binlogIO: io.NewBinlogIO(&mockCm{errSave: true}, getOrCreateIOPool()),
|
|
||||||
Allocator: alloc,
|
|
||||||
done: make(chan struct{}, 1),
|
|
||||||
}
|
|
||||||
|
|
||||||
_, _, err = ct.uploadRemainLog(ctx, 1, 2, meta, stats, 10, nil)
|
|
||||||
assert.Error(t, err)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getInt64DeltaBlobs(segID UniqueID, pks []UniqueID, tss []Timestamp) ([]*Blob, error) {
|
func getInt64DeltaBlobs(segID UniqueID, pks []UniqueID, tss []Timestamp) ([]*Blob, error) {
|
||||||
@ -924,12 +925,16 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false)
|
metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false)
|
||||||
|
|
||||||
iData1 := genInsertDataWithPKs(c.pks1, c.pkType)
|
iData1 := genInsertDataWithPKs(c.pks1, c.pkType)
|
||||||
|
iblobs1, err := iCodec.Serialize(c.parID, 0, iData1)
|
||||||
|
assert.NoError(t, err)
|
||||||
dData1 := &DeleteData{
|
dData1 := &DeleteData{
|
||||||
Pks: []storage.PrimaryKey{c.pks1[0]},
|
Pks: []storage.PrimaryKey{c.pks1[0]},
|
||||||
Tss: []Timestamp{20000},
|
Tss: []Timestamp{20000},
|
||||||
RowCount: 1,
|
RowCount: 1,
|
||||||
}
|
}
|
||||||
iData2 := genInsertDataWithPKs(c.pks2, c.pkType)
|
iData2 := genInsertDataWithPKs(c.pks2, c.pkType)
|
||||||
|
iblobs2, err := iCodec.Serialize(c.parID, 3, iData2)
|
||||||
|
assert.NoError(t, err)
|
||||||
dData2 := &DeleteData{
|
dData2 := &DeleteData{
|
||||||
Pks: []storage.PrimaryKey{c.pks2[0]},
|
Pks: []storage.PrimaryKey{c.pks2[0]},
|
||||||
Tss: []Timestamp{30000},
|
Tss: []Timestamp{30000},
|
||||||
@ -938,7 +943,7 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
|
|
||||||
stats1, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1)
|
stats1, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, iData1, iCodec)
|
iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, iblobs1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, stats1, 2, iCodec)
|
sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, stats1, 2, iCodec)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -948,7 +953,7 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
|
|
||||||
stats2, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1)
|
stats2, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, iData2, iCodec)
|
iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, iblobs2)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, stats2, 2, iCodec)
|
sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, stats2, 2, iCodec)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -1067,7 +1072,11 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
// the same pk for segmentI and segmentII
|
// the same pk for segmentI and segmentII
|
||||||
pks := [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)}
|
pks := [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)}
|
||||||
iData1 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
iData1 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
||||||
|
iblobs1, err := iCodec.Serialize(partID, 0, iData1)
|
||||||
|
assert.NoError(t, err)
|
||||||
iData2 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
iData2 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
||||||
|
iblobs2, err := iCodec.Serialize(partID, 1, iData2)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
pk1 := storage.NewInt64PrimaryKey(1)
|
pk1 := storage.NewInt64PrimaryKey(1)
|
||||||
dData1 := &DeleteData{
|
dData1 := &DeleteData{
|
||||||
@ -1084,7 +1093,7 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
|
|
||||||
stats1, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1)
|
stats1, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, iData1, iCodec)
|
iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, iblobs1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, stats1, 1, iCodec)
|
sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, stats1, 1, iCodec)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -1094,7 +1103,7 @@ func TestCompactorInterfaceMethods(t *testing.T) {
|
|||||||
|
|
||||||
stats2, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1)
|
stats2, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, iData2, iCodec)
|
iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, iblobs2)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, stats2, 1, iCodec)
|
sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, stats2, 1, iCodec)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -1160,3 +1169,78 @@ func TestInjectDone(t *testing.T) {
|
|||||||
task.injectDone()
|
task.injectDone()
|
||||||
task.injectDone()
|
task.injectDone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCompaction(b *testing.B) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir))
|
||||||
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
||||||
|
|
||||||
|
collectionID := int64(1)
|
||||||
|
meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64)
|
||||||
|
mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool())
|
||||||
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
|
iData := genInsertDataWithExpiredTS()
|
||||||
|
iCodec := storage.NewInsertCodecWithSchema(meta)
|
||||||
|
var partId int64 = 0
|
||||||
|
var segmentId int64 = 1
|
||||||
|
blobs, err := iCodec.Serialize(partId, 0, iData)
|
||||||
|
assert.NoError(b, err)
|
||||||
|
var allPaths [][]string
|
||||||
|
alloc := allocator.NewMockAllocator(b)
|
||||||
|
alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil)
|
||||||
|
alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil)
|
||||||
|
inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs)
|
||||||
|
assert.NoError(b, err)
|
||||||
|
assert.Equal(b, 12, len(inpath))
|
||||||
|
binlogNum := len(inpath[0].GetBinlogs())
|
||||||
|
assert.Equal(b, 1, binlogNum)
|
||||||
|
|
||||||
|
for idx := 0; idx < binlogNum; idx++ {
|
||||||
|
var ps []string
|
||||||
|
for _, path := range inpath {
|
||||||
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
||||||
|
}
|
||||||
|
allPaths = append(allPaths, ps)
|
||||||
|
}
|
||||||
|
|
||||||
|
dm := map[interface{}]Timestamp{
|
||||||
|
1: 10000,
|
||||||
|
}
|
||||||
|
|
||||||
|
metaCache := metacache.NewMockMetaCache(b)
|
||||||
|
metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe()
|
||||||
|
metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) {
|
||||||
|
segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{
|
||||||
|
CollectionID: 1,
|
||||||
|
PartitionID: 0,
|
||||||
|
ID: id,
|
||||||
|
NumOfRows: 10,
|
||||||
|
}, nil)
|
||||||
|
return segment, true
|
||||||
|
})
|
||||||
|
|
||||||
|
ct := &compactionTask{
|
||||||
|
metaCache: metaCache,
|
||||||
|
binlogIO: mockbIO,
|
||||||
|
Allocator: alloc,
|
||||||
|
done: make(chan struct{}, 1),
|
||||||
|
plan: &datapb.CompactionPlan{
|
||||||
|
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
|
||||||
|
{SegmentID: 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm)
|
||||||
|
assert.NoError(b, err)
|
||||||
|
assert.Equal(b, int64(2), numOfRow)
|
||||||
|
assert.Equal(b, 1, len(inPaths[0].GetBinlogs()))
|
||||||
|
assert.Equal(b, 1, len(statsPaths))
|
||||||
|
assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom())
|
||||||
|
assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -99,7 +99,6 @@ func (b *BinlogIoImpl) Upload(ctx context.Context, kvs map[string][]byte) error
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
|
||||||
return struct{}{}, err
|
return struct{}{}, err
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@ -28,6 +28,7 @@ import (
|
|||||||
"github.com/apache/arrow/go/v12/arrow"
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet"
|
||||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
@ -749,18 +750,17 @@ var _ RecordWriter = (*singleFieldRecordWriter)(nil)
|
|||||||
type singleFieldRecordWriter struct {
|
type singleFieldRecordWriter struct {
|
||||||
fw *pqarrow.FileWriter
|
fw *pqarrow.FileWriter
|
||||||
fieldId FieldID
|
fieldId FieldID
|
||||||
|
schema *arrow.Schema
|
||||||
|
|
||||||
grouped bool
|
numRows int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sfw *singleFieldRecordWriter) Write(r Record) error {
|
func (sfw *singleFieldRecordWriter) Write(r Record) error {
|
||||||
if !sfw.grouped {
|
sfw.numRows += r.Len()
|
||||||
sfw.grouped = true
|
|
||||||
sfw.fw.NewRowGroup()
|
|
||||||
}
|
|
||||||
// TODO: adding row group support by calling fw.NewRowGroup()
|
|
||||||
a := r.Column(sfw.fieldId)
|
a := r.Column(sfw.fieldId)
|
||||||
return sfw.fw.WriteColumnData(a)
|
rec := array.NewRecord(sfw.schema, []arrow.Array{a}, int64(r.Len()))
|
||||||
|
defer rec.Release()
|
||||||
|
return sfw.fw.WriteBuffered(rec)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sfw *singleFieldRecordWriter) Close() {
|
func (sfw *singleFieldRecordWriter) Close() {
|
||||||
@ -769,13 +769,16 @@ func (sfw *singleFieldRecordWriter) Close() {
|
|||||||
|
|
||||||
func newSingleFieldRecordWriter(fieldId FieldID, field arrow.Field, writer io.Writer) (*singleFieldRecordWriter, error) {
|
func newSingleFieldRecordWriter(fieldId FieldID, field arrow.Field, writer io.Writer) (*singleFieldRecordWriter, error) {
|
||||||
schema := arrow.NewSchema([]arrow.Field{field}, nil)
|
schema := arrow.NewSchema([]arrow.Field{field}, nil)
|
||||||
fw, err := pqarrow.NewFileWriter(schema, writer, nil, pqarrow.DefaultWriterProps())
|
fw, err := pqarrow.NewFileWriter(schema, writer,
|
||||||
|
parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(math.MaxInt64)), // No additional grouping for now.
|
||||||
|
pqarrow.DefaultWriterProps())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return &singleFieldRecordWriter{
|
return &singleFieldRecordWriter{
|
||||||
fw: fw,
|
fw: fw,
|
||||||
fieldId: fieldId,
|
fieldId: fieldId,
|
||||||
|
schema: schema,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -790,15 +793,18 @@ type SerializeWriter[T any] struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (sw *SerializeWriter[T]) Flush() error {
|
func (sw *SerializeWriter[T]) Flush() error {
|
||||||
|
if sw.pos == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
buf := sw.buffer[:sw.pos]
|
buf := sw.buffer[:sw.pos]
|
||||||
r, size, err := sw.serializer(buf)
|
r, size, err := sw.serializer(buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer r.Release()
|
||||||
if err := sw.rw.Write(r); err != nil {
|
if err := sw.rw.Write(r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
r.Release()
|
|
||||||
sw.pos = 0
|
sw.pos = 0
|
||||||
sw.writtenMemorySize += size
|
sw.writtenMemorySize += size
|
||||||
return nil
|
return nil
|
||||||
@ -823,8 +829,11 @@ func (sw *SerializeWriter[T]) WrittenMemorySize() uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (sw *SerializeWriter[T]) Close() error {
|
func (sw *SerializeWriter[T]) Close() error {
|
||||||
|
if err := sw.Flush(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
sw.rw.Close()
|
sw.rw.Close()
|
||||||
return sw.Flush()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSerializeRecordWriter[T any](rw RecordWriter, serializer Serializer[T], batchSize int) *SerializeWriter[T] {
|
func NewSerializeRecordWriter[T any](rw RecordWriter, serializer Serializer[T], batchSize int) *SerializeWriter[T] {
|
||||||
@ -881,7 +890,7 @@ type BinlogStreamWriter struct {
|
|||||||
memorySize int // To be updated on the fly
|
memorySize int // To be updated on the fly
|
||||||
|
|
||||||
buf bytes.Buffer
|
buf bytes.Buffer
|
||||||
rw RecordWriter
|
rw *singleFieldRecordWriter
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bsw *BinlogStreamWriter) GetRecordWriter() (RecordWriter, error) {
|
func (bsw *BinlogStreamWriter) GetRecordWriter() (RecordWriter, error) {
|
||||||
@ -918,6 +927,7 @@ func (bsw *BinlogStreamWriter) Finalize() (*Blob, error) {
|
|||||||
return &Blob{
|
return &Blob{
|
||||||
Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)),
|
Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)),
|
||||||
Value: b.Bytes(),
|
Value: b.Bytes(),
|
||||||
|
RowNum: int64(bsw.rw.numRows),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -124,7 +124,7 @@ func TestBinlogSerializeWriter(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test serialize", func(t *testing.T) {
|
t.Run("test serialize", func(t *testing.T) {
|
||||||
size := 3
|
size := 16
|
||||||
blobs, err := generateTestData(size)
|
blobs, err := generateTestData(size)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
reader, err := NewBinlogDeserializeReader(blobs, common.RowIDField)
|
reader, err := NewBinlogDeserializeReader(blobs, common.RowIDField)
|
||||||
@ -134,7 +134,7 @@ func TestBinlogSerializeWriter(t *testing.T) {
|
|||||||
schema := generateTestSchema()
|
schema := generateTestSchema()
|
||||||
// Copy write the generated data
|
// Copy write the generated data
|
||||||
writers := NewBinlogStreamWriters(0, 0, 0, schema.Fields)
|
writers := NewBinlogStreamWriters(0, 0, 0, schema.Fields)
|
||||||
writer, err := NewBinlogSerializeWriter(schema, 0, 0, writers, 1024)
|
writer, err := NewBinlogSerializeWriter(schema, 0, 0, writers, 7)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
for i := 1; i <= size; i++ {
|
for i := 1; i <= size; i++ {
|
||||||
@ -143,7 +143,8 @@ func TestBinlogSerializeWriter(t *testing.T) {
|
|||||||
|
|
||||||
value := reader.Value()
|
value := reader.Value()
|
||||||
assertTestData(t, i, value)
|
assertTestData(t, i, value)
|
||||||
writer.Write(value)
|
err := writer.Write(value)
|
||||||
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = reader.Next()
|
err = reader.Next()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user