Refine merge operation during compacting phase (#18399)

Signed-off-by: yun.zhang <yun.zhang@zilliz.com>
This commit is contained in:
jaime 2022-07-28 14:52:31 +08:00 committed by GitHub
parent a287a2b3fd
commit c9174d55ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 18 deletions

View File

@ -253,7 +253,12 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) {
continue continue
} }
log.Info("time cost of generating global compaction", zap.Int64("planID", plan.PlanID), zap.Any("time cost", time.Since(start).Milliseconds()), segIDs := make(map[int64][]*datapb.FieldBinlog, len(plan.SegmentBinlogs))
for _, seg := range plan.SegmentBinlogs {
segIDs[seg.SegmentID] = seg.Deltalogs
}
log.Info("time cost of generating global compaction", zap.Any("segID2DeltaLogs", segIDs), zap.Int64("planID", plan.PlanID), zap.Any("time cost", time.Since(start).Milliseconds()),
zap.Int64("collectionID", signal.collectionID), zap.String("channel", group.channelName), zap.Int64("partitionID", group.partitionID)) zap.Int64("collectionID", signal.collectionID), zap.String("channel", group.channelName), zap.Int64("partitionID", group.partitionID))
} }
} }

View File

@ -133,12 +133,12 @@ func (t *compactionTask) getChannelName() string {
return t.plan.GetChannel() return t.plan.GetChannel()
} }
func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob, timetravelTs Timestamp) (map[primaryKey]Timestamp, *DelDataBuf, error) { func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob, timetravelTs Timestamp) (map[interface{}]Timestamp, *DelDataBuf, error) {
mergeStart := time.Now() mergeStart := time.Now()
dCodec := storage.NewDeleteCodec() dCodec := storage.NewDeleteCodec()
var ( var (
pk2ts = make(map[primaryKey]Timestamp) pk2ts = make(map[interface{}]Timestamp)
dbuff = &DelDataBuf{ dbuff = &DelDataBuf{
delData: &DeleteData{ delData: &DeleteData{
Pks: make([]primaryKey, 0), Pks: make([]primaryKey, 0),
@ -162,7 +162,7 @@ func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob, timetravelT
ts := dData.Tss[i] ts := dData.Tss[i]
if timetravelTs != Timestamp(0) && dData.Tss[i] <= timetravelTs { if timetravelTs != Timestamp(0) && dData.Tss[i] <= timetravelTs {
pk2ts[pk] = ts pk2ts[pk.GetValue()] = ts
continue continue
} }
@ -191,7 +191,7 @@ func nano2Milli(nano time.Duration) float64 {
return float64(nano) / float64(time.Millisecond) return float64(nano) / float64(time.Millisecond)
} }
func (t *compactionTask) merge(mergeItr iterator, delta map[primaryKey]Timestamp, schema *schemapb.CollectionSchema, currentTs Timestamp) ([]*InsertData, int64, error) { func (t *compactionTask) merge(mergeItr iterator, delta map[interface{}]Timestamp, schema *schemapb.CollectionSchema, currentTs Timestamp) ([]*InsertData, int64, error) {
mergeStart := time.Now() mergeStart := time.Now()
var ( var (
@ -207,12 +207,10 @@ func (t *compactionTask) merge(mergeItr iterator, delta map[primaryKey]Timestamp
) )
isDeletedValue := func(v *storage.Value) bool { isDeletedValue := func(v *storage.Value) bool {
for pk, ts := range delta { ts, ok := delta[v.PK.GetValue()]
if pk.EQ(v.PK) && uint64(v.Timestamp) <= ts { if ok && uint64(v.Timestamp) <= ts {
return true return true
}
} }
return false return false
} }

View File

@ -259,9 +259,8 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
mitr := storage.NewMergeIterator([]iterator{iitr}) mitr := storage.NewMergeIterator([]iterator{iitr})
pk := newInt64PrimaryKey(1) dm := map[interface{}]Timestamp{
dm := map[primaryKey]Timestamp{ 1: 10000,
pk: 10000,
} }
ct := &compactionTask{} ct := &compactionTask{}
@ -289,7 +288,7 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
mitr := storage.NewMergeIterator([]iterator{iitr}) mitr := storage.NewMergeIterator([]iterator{iitr})
dm := map[primaryKey]Timestamp{} dm := map[interface{}]Timestamp{}
ct := &compactionTask{} ct := &compactionTask{}
idata, numOfRow, err := ct.merge(mitr, dm, meta.GetSchema(), ct.GetCurrentTime()) idata, numOfRow, err := ct.merge(mitr, dm, meta.GetSchema(), ct.GetCurrentTime())
@ -311,10 +310,8 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
mitr := storage.NewMergeIterator([]iterator{iitr}) mitr := storage.NewMergeIterator([]iterator{iitr})
dm := map[interface{}]Timestamp{
pk := newInt64PrimaryKey(1) 1: 10000,
dm := map[primaryKey]Timestamp{
pk: 10000,
} }
ct := &compactionTask{} ct := &compactionTask{}

View File

@ -34,6 +34,7 @@ type PrimaryKey interface {
MarshalJSON() ([]byte, error) MarshalJSON() ([]byte, error)
UnmarshalJSON(data []byte) error UnmarshalJSON(data []byte) error
SetValue(interface{}) error SetValue(interface{}) error
GetValue() interface{}
Type() schemapb.DataType Type() schemapb.DataType
} }
@ -147,6 +148,10 @@ func (ip *Int64PrimaryKey) Type() schemapb.DataType {
return schemapb.DataType_Int64 return schemapb.DataType_Int64
} }
func (ip *Int64PrimaryKey) GetValue() interface{} {
return ip.Value
}
type BaseStringPrimaryKey struct { type BaseStringPrimaryKey struct {
Value string Value string
} }
@ -199,6 +204,10 @@ func (sp *BaseStringPrimaryKey) SetValue(data interface{}) error {
return nil return nil
} }
func (sp *BaseStringPrimaryKey) GetValue() interface{} {
return sp.Value
}
type VarCharPrimaryKey struct { type VarCharPrimaryKey struct {
BaseStringPrimaryKey BaseStringPrimaryKey
} }