mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-28 14:35:27 +08:00
This commit refines L0 compaction to ensure data consistency by properly setting the delete position boundary for L1 segment selection. Key Changes: 1. L0 View Trigger Sets latestDeletePos for L1 Selection 2. Filter L0 Segments by Growing Segment Position in policy, not in views 3. Renamed LevelZeroSegmentsView to LevelZeroCompactionView 4. Renamed fields for semantic clarity: * segments -> l0Segments * earliestGrowingSegmentPos -> latestDeletePos 5. Update Default Compaction Prioritizer to level See also: #46434 --------- Signed-off-by: yangxuan <xuan.yang@zilliz.com>
242 lines
7.6 KiB
Go
242 lines
7.6 KiB
Go
package datacoord
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/samber/lo"
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/datacoord/allocator"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
// Chooses qualified L0 segments to do L0 compaction
|
|
type l0CompactionPolicy struct {
|
|
meta *meta
|
|
|
|
activeCollections *activeCollections
|
|
allocator allocator.Allocator
|
|
|
|
// key: collectionID, value: reference count
|
|
skipCompactionCollections map[int64]int
|
|
skipLocker sync.RWMutex
|
|
}
|
|
|
|
func newL0CompactionPolicy(meta *meta, allocator allocator.Allocator) *l0CompactionPolicy {
|
|
return &l0CompactionPolicy{
|
|
meta: meta,
|
|
activeCollections: newActiveCollections(),
|
|
allocator: allocator,
|
|
skipCompactionCollections: make(map[int64]int),
|
|
}
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) Enable() bool {
|
|
return Params.DataCoordCfg.EnableAutoCompaction.GetAsBool()
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) AddSkipCollection(collectionID UniqueID) {
|
|
policy.skipLocker.Lock()
|
|
defer policy.skipLocker.Unlock()
|
|
|
|
if _, ok := policy.skipCompactionCollections[collectionID]; !ok {
|
|
policy.skipCompactionCollections[collectionID] = 1
|
|
} else {
|
|
policy.skipCompactionCollections[collectionID]++
|
|
}
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) RemoveSkipCollection(collectionID UniqueID) {
|
|
policy.skipLocker.Lock()
|
|
defer policy.skipLocker.Unlock()
|
|
refCount := policy.skipCompactionCollections[collectionID]
|
|
if refCount > 1 {
|
|
policy.skipCompactionCollections[collectionID]--
|
|
} else {
|
|
delete(policy.skipCompactionCollections, collectionID)
|
|
}
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) isSkipCollection(collectionID UniqueID) bool {
|
|
policy.skipLocker.RLock()
|
|
defer policy.skipLocker.RUnlock()
|
|
return policy.skipCompactionCollections[collectionID] > 0
|
|
}
|
|
|
|
// Notify policy to record the active updated(when adding a new L0 segment) collections.
|
|
func (policy *l0CompactionPolicy) OnCollectionUpdate(collectionID int64) {
|
|
policy.activeCollections.Record(collectionID)
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) Trigger(ctx context.Context) (events map[CompactionTriggerType][]CompactionView, err error) {
|
|
latestCollSegs := policy.meta.GetCompactableSegmentGroupByCollection()
|
|
|
|
// 1. Get active collections
|
|
activeColls := policy.activeCollections.GetActiveCollections()
|
|
|
|
// 2. Idle collections = all collections - active collections
|
|
missCached, idleColls := lo.Difference(activeColls, lo.Keys(latestCollSegs))
|
|
policy.activeCollections.ClearMissCached(missCached...)
|
|
|
|
idleCollsSet := typeutil.NewUniqueSet(idleColls...)
|
|
activeL0Views, idleL0Views := []CompactionView{}, []CompactionView{}
|
|
newTriggerID, err := policy.allocator.AllocID(ctx)
|
|
if err != nil {
|
|
log.Warn("fail to allocate triggerID to trigger l0 compaction", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
events = make(map[CompactionTriggerType][]CompactionView)
|
|
for collID, segments := range latestCollSegs {
|
|
if policy.isSkipCollection(collID) {
|
|
continue
|
|
}
|
|
|
|
policy.activeCollections.Read(collID)
|
|
levelZeroSegments := lo.Filter(segments, func(info *SegmentInfo, _ int) bool {
|
|
return info.GetLevel() == datapb.SegmentLevel_L0
|
|
})
|
|
if len(levelZeroSegments) == 0 {
|
|
continue
|
|
}
|
|
labelViews := policy.groupL0ViewsByPartChan(collID, GetViewsByInfo(levelZeroSegments...), newTriggerID)
|
|
if idleCollsSet.Contain(collID) {
|
|
idleL0Views = append(idleL0Views, labelViews...)
|
|
} else {
|
|
activeL0Views = append(activeL0Views, labelViews...)
|
|
}
|
|
}
|
|
|
|
if len(activeL0Views) > 0 {
|
|
events[TriggerTypeLevelZeroViewChange] = activeL0Views
|
|
}
|
|
|
|
if len(idleL0Views) > 0 {
|
|
events[TriggerTypeLevelZeroViewIDLE] = idleL0Views
|
|
}
|
|
return
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) triggerOneCollection(ctx context.Context, collectionID int64) ([]CompactionView, int64, error) {
|
|
log := log.Ctx(ctx).With(zap.Int64("collectionID", collectionID))
|
|
log.Info("start trigger collection l0 compaction")
|
|
if policy.isSkipCollection(collectionID) {
|
|
return nil, 0, merr.WrapErrCollectionNotLoaded(collectionID, "the collection being paused by importing cannot do force l0 compaction")
|
|
}
|
|
allL0Segments := policy.meta.SelectSegments(ctx, WithCollection(collectionID), SegmentFilterFunc(func(segment *SegmentInfo) bool {
|
|
return isSegmentHealthy(segment) &&
|
|
isFlushed(segment) &&
|
|
!segment.isCompacting && // not compacting now
|
|
!segment.GetIsImporting() && // not importing now
|
|
segment.GetLevel() == datapb.SegmentLevel_L0
|
|
}))
|
|
|
|
if len(allL0Segments) == 0 {
|
|
return nil, 0, nil
|
|
}
|
|
|
|
newTriggerID, err := policy.allocator.AllocID(ctx)
|
|
if err != nil {
|
|
log.Warn("fail to allocate triggerID for l0 compaction", zap.Error(err))
|
|
return nil, 0, err
|
|
}
|
|
views := policy.groupL0ViewsByPartChan(collectionID, GetViewsByInfo(allL0Segments...), newTriggerID)
|
|
return views, newTriggerID, nil
|
|
}
|
|
|
|
func (policy *l0CompactionPolicy) groupL0ViewsByPartChan(collectionID UniqueID, levelZeroSegments []*SegmentView, triggerID UniqueID) []CompactionView {
|
|
partChanView := make(map[string]*LevelZeroCompactionView) // "part-chan" as key
|
|
for _, segView := range levelZeroSegments {
|
|
key := segView.label.Key()
|
|
if _, ok := partChanView[key]; !ok {
|
|
earliestGrowingStartPos := policy.meta.GetEarliestStartPositionOfGrowingSegments(segView.label)
|
|
partChanView[key] = &LevelZeroCompactionView{
|
|
label: segView.label,
|
|
l0Segments: []*SegmentView{},
|
|
latestDeletePos: earliestGrowingStartPos,
|
|
triggerID: triggerID,
|
|
}
|
|
}
|
|
|
|
l0View := partChanView[key]
|
|
// Only choose segments with position less than or equal to the earliest growing segment position
|
|
if segView.dmlPos.GetTimestamp() <= l0View.latestDeletePos.GetTimestamp() {
|
|
l0View.Append(segView)
|
|
}
|
|
}
|
|
|
|
return lo.Map(lo.Values(partChanView), func(view *LevelZeroCompactionView, _ int) CompactionView {
|
|
return view
|
|
})
|
|
}
|
|
|
|
type activeCollection struct {
|
|
ID int64
|
|
lastRefresh time.Time
|
|
readCount *atomic.Int64
|
|
}
|
|
|
|
func newActiveCollection(ID int64) *activeCollection {
|
|
return &activeCollection{
|
|
ID: ID,
|
|
lastRefresh: time.Now(),
|
|
readCount: atomic.NewInt64(0),
|
|
}
|
|
}
|
|
|
|
type activeCollections struct {
|
|
collections map[int64]*activeCollection
|
|
collGuard sync.RWMutex
|
|
}
|
|
|
|
func newActiveCollections() *activeCollections {
|
|
return &activeCollections{
|
|
collections: make(map[int64]*activeCollection),
|
|
}
|
|
}
|
|
|
|
func (ac *activeCollections) ClearMissCached(collectionIDs ...int64) {
|
|
ac.collGuard.Lock()
|
|
defer ac.collGuard.Unlock()
|
|
lo.ForEach(collectionIDs, func(collID int64, _ int) {
|
|
delete(ac.collections, collID)
|
|
})
|
|
}
|
|
|
|
func (ac *activeCollections) Record(collectionID int64) {
|
|
ac.collGuard.Lock()
|
|
defer ac.collGuard.Unlock()
|
|
if _, ok := ac.collections[collectionID]; !ok {
|
|
ac.collections[collectionID] = newActiveCollection(collectionID)
|
|
} else {
|
|
ac.collections[collectionID].lastRefresh = time.Now()
|
|
ac.collections[collectionID].readCount.Store(0)
|
|
}
|
|
}
|
|
|
|
func (ac *activeCollections) Read(collectionID int64) {
|
|
ac.collGuard.Lock()
|
|
defer ac.collGuard.Unlock()
|
|
if _, ok := ac.collections[collectionID]; ok {
|
|
ac.collections[collectionID].readCount.Inc()
|
|
if ac.collections[collectionID].readCount.Load() >= 3 &&
|
|
time.Since(ac.collections[collectionID].lastRefresh) > 3*paramtable.Get().DataCoordCfg.L0CompactionTriggerInterval.GetAsDuration(time.Second) {
|
|
log.Info("Active(of deletions) collections become idle", zap.Int64("collectionID", collectionID))
|
|
delete(ac.collections, collectionID)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (ac *activeCollections) GetActiveCollections() []int64 {
|
|
ac.collGuard.RLock()
|
|
defer ac.collGuard.RUnlock()
|
|
|
|
return lo.Keys(ac.collections)
|
|
}
|