congqixia eaabe0293b
fix: Update segment compactTo when compactTo segment is compacted (#28755)
Related to #28736 #28748
See also #27675
Previous PR: #28646

This PR fixes `SegmentNotFound` issue when compaction happens multiple
times and the buffer of first generation segment is sync due to stale
policy

Now the `CompactSegments` API of metacache shall update the compactTo
field of segmentInfo if the compactTo segment is also compacted to keep
the bloodline clean

Also, add the `CompactedSegment` SyncPolicy to sync the compacted
segment asap to keep metacache clean

Now the `SyncPolicy` is an interface instead of a function type so that
when it selects some segments to sync, we colud log the reason and
target segment

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2023-11-27 19:48:26 +08:00

99 lines
3.3 KiB
Go

package writebuffer
import (
"time"
"github.com/samber/lo"
"go.uber.org/atomic"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/datanode/metacache"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type SyncPolicy interface {
SelectSegments(buffers []*segmentBuffer, ts typeutil.Timestamp) []int64
Reason() string
}
type SelectSegmentFunc func(buffer []*segmentBuffer, ts typeutil.Timestamp) []int64
type SelectSegmentFnPolicy struct {
fn SelectSegmentFunc
reason string
}
func (f SelectSegmentFnPolicy) SelectSegments(buffers []*segmentBuffer, ts typeutil.Timestamp) []int64 {
return f.fn(buffers, ts)
}
func (f SelectSegmentFnPolicy) Reason() string { return f.reason }
func wrapSelectSegmentFuncPolicy(fn SelectSegmentFunc, reason string) SelectSegmentFnPolicy {
return SelectSegmentFnPolicy{
fn: fn,
reason: reason,
}
}
func GetFullBufferPolicy() SyncPolicy {
return wrapSelectSegmentFuncPolicy(
func(buffers []*segmentBuffer, _ typeutil.Timestamp) []int64 {
return lo.FilterMap(buffers, func(buf *segmentBuffer, _ int) (int64, bool) {
return buf.segmentID, buf.IsFull()
})
}, "buffer full")
}
func GetCompactedSegmentsPolicy(meta metacache.MetaCache) SyncPolicy {
return wrapSelectSegmentFuncPolicy(func(buffers []*segmentBuffer, _ typeutil.Timestamp) []int64 {
segmentIDs := lo.Map(buffers, func(buffer *segmentBuffer, _ int) int64 { return buffer.segmentID })
return meta.GetSegmentIDsBy(metacache.WithSegmentIDs(segmentIDs...), metacache.WithCompacted())
}, "segment compacted")
}
func GetSyncStaleBufferPolicy(staleDuration time.Duration) SyncPolicy {
return wrapSelectSegmentFuncPolicy(func(buffers []*segmentBuffer, ts typeutil.Timestamp) []int64 {
current := tsoutil.PhysicalTime(ts)
return lo.FilterMap(buffers, func(buf *segmentBuffer, _ int) (int64, bool) {
minTs := buf.MinTimestamp()
start := tsoutil.PhysicalTime(minTs)
return buf.segmentID, current.Sub(start) > staleDuration
})
}, "buffer stale")
}
func GetFlushingSegmentsPolicy(meta metacache.MetaCache) SyncPolicy {
return wrapSelectSegmentFuncPolicy(func(_ []*segmentBuffer, _ typeutil.Timestamp) []int64 {
return meta.GetSegmentIDsBy(metacache.WithSegmentState(commonpb.SegmentState_Flushing))
}, "segment flushing")
}
func GetFlushTsPolicy(flushTimestamp *atomic.Uint64, meta metacache.MetaCache) SyncPolicy {
return wrapSelectSegmentFuncPolicy(func(buffers []*segmentBuffer, ts typeutil.Timestamp) []int64 {
flushTs := flushTimestamp.Load()
if flushTs != nonFlushTS && ts >= flushTs {
// flush segment start pos < flushTs && checkpoint > flushTs
ids := lo.FilterMap(buffers, func(buf *segmentBuffer, _ int) (int64, bool) {
seg, ok := meta.GetSegmentByID(buf.segmentID)
if !ok {
return buf.segmentID, false
}
inRange := seg.State() == commonpb.SegmentState_Flushed ||
seg.Level() == datapb.SegmentLevel_L0
return buf.segmentID, inRange && buf.MinTimestamp() < flushTs
})
// set segment flushing
meta.UpdateSegments(metacache.UpdateState(commonpb.SegmentState_Flushing),
metacache.WithSegmentIDs(ids...), metacache.WithSegmentState(commonpb.SegmentState_Growing))
// flush all buffer
return ids
}
return nil
}, "flush ts")
}