milvus/internal/datacoord/compaction_task_l0.go
XuanYang-cn 2dd2c96eb1
fix: Accidentally ignored sealed segments in L0 Compaction (#45340)
When there're no growing segments in the collection, L0 Compaction will
try to choose all L0 segments that hits all L1/L2 segments.

However, if there's Sealed Segment still under flushing in DataNode at
the same time L0 Compaction selects satisfied L1/L2 segments, L0
Compaction will ignore this Segment because it's not in "FlushState",
which is wrong, causing missing deletes on the Sealed Segment.

This quick solution here is to fail this L0 compaction task once
selected a Sealed segment.

See also: #45339

---------

Signed-off-by: yangxuan <xuan.yang@zilliz.com>
2025-11-06 16:53:38 +08:00

436 lines
16 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"time"
"github.com/cockroachdb/errors"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/compaction"
"github.com/milvus-io/milvus/internal/datacoord/allocator"
"github.com/milvus-io/milvus/internal/datacoord/session"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/metrics"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/taskcommon"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
var _ CompactionTask = (*l0CompactionTask)(nil)
type l0CompactionTask struct {
taskProto atomic.Value // *datapb.CompactionTask
allocator allocator.Allocator
meta CompactionMeta
times *taskcommon.Times
}
func (t *l0CompactionTask) GetTaskID() int64 {
return t.GetTaskProto().GetPlanID()
}
func (t *l0CompactionTask) GetTaskType() taskcommon.Type {
return taskcommon.Compaction
}
func (t *l0CompactionTask) GetTaskState() taskcommon.State {
return taskcommon.FromCompactionState(t.GetTaskProto().GetState())
}
func (t *l0CompactionTask) GetTaskSlot() int64 {
batchSize := paramtable.Get().CommonCfg.BloomFilterApplyBatchSize.GetAsInt()
factor := paramtable.Get().DataCoordCfg.L0DeleteCompactionSlotUsage.GetAsInt64()
slot := factor * t.GetTaskProto().GetTotalRows() / int64(batchSize)
if slot < 1 {
return 1
}
return slot
}
func (t *l0CompactionTask) SetTaskTime(timeType taskcommon.TimeType, time time.Time) {
t.times.SetTaskTime(timeType, time)
}
func (t *l0CompactionTask) GetTaskTime(timeType taskcommon.TimeType) time.Time {
return timeType.GetTaskTime(t.times)
}
func (t *l0CompactionTask) GetTaskVersion() int64 {
return int64(t.GetTaskProto().GetRetryTimes())
}
func (t *l0CompactionTask) CreateTaskOnWorker(nodeID int64, cluster session.Cluster) {
log := log.With(zap.Int64("triggerID", t.GetTaskProto().GetTriggerID()), zap.Int64("nodeID", t.GetTaskProto().GetNodeID()))
plan, err := t.BuildCompactionRequest()
if err != nil {
log.Warn("l0CompactionTask failed to build compaction request", zap.Error(err))
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error()))
if err != nil {
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
}
return
}
err = cluster.CreateCompaction(nodeID, plan)
if err != nil {
originNodeID := t.GetTaskProto().GetNodeID()
log.Warn("l0CompactionTask failed to notify compaction tasks to DataNode",
zap.Int64("planID", t.GetTaskProto().GetPlanID()),
zap.Int64("nodeID", originNodeID),
zap.Error(err))
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
if err != nil {
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Error(err))
return
}
metrics.DataCoordCompactionTaskNum.WithLabelValues(fmt.Sprintf("%d", originNodeID), t.GetTaskProto().GetType().String(), metrics.Executing).Dec()
metrics.DataCoordCompactionTaskNum.WithLabelValues(fmt.Sprintf("%d", NullNodeID), t.GetTaskProto().GetType().String(), metrics.Pending).Inc()
return
}
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_executing), setNodeID(nodeID))
if err != nil {
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
}
}
func (t *l0CompactionTask) QueryTaskOnWorker(cluster session.Cluster) {
log := log.With(zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Int64("nodeID", t.GetTaskProto().GetNodeID()))
result, err := cluster.QueryCompaction(t.GetTaskProto().GetNodeID(), &datapb.CompactionStateRequest{
PlanID: t.GetTaskProto().GetPlanID(),
})
if err != nil || result == nil {
if errors.Is(err, merr.ErrNodeNotFound) {
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
}
log.Warn("l0CompactionTask failed to get compaction result", zap.Error(err))
return
}
switch result.GetState() {
case datapb.CompactionTaskState_completed:
err = t.meta.ValidateSegmentStateBeforeCompleteCompactionMutation(t.GetTaskProto())
if err != nil {
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error()))
return
}
if err = t.saveSegmentMeta(result); err != nil {
log.Warn("l0CompactionTask failed to save segment meta", zap.Error(err))
return
}
if err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved)); err != nil {
log.Warn("l0CompactionTask failed to save task meta_saved state", zap.Error(err))
return
}
UpdateCompactionSegmentSizeMetrics(result.GetSegments())
t.processMetaSaved()
case datapb.CompactionTaskState_pipelining, datapb.CompactionTaskState_executing:
return
case datapb.CompactionTaskState_timeout:
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_timeout))
if err != nil {
log.Warn("update clustering compaction task meta failed", zap.Error(err))
return
}
case datapb.CompactionTaskState_failed:
if err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)); err != nil {
log.Warn("l0CompactionTask failed to set task failed state", zap.Error(err))
return
}
default:
log.Error("not support compaction task state", zap.String("state", result.GetState().String()))
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed))
if err != nil {
log.Warn("update clustering compaction task meta failed", zap.Error(err))
return
}
}
}
func (t *l0CompactionTask) DropTaskOnWorker(cluster session.Cluster) {
if t.hasAssignedWorker() {
err := cluster.DropCompaction(t.GetTaskProto().GetNodeID(), t.GetTaskProto().GetPlanID())
if err != nil {
log.Warn("l0CompactionTask unable to drop compaction plan", zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Error(err))
}
}
}
func (t *l0CompactionTask) GetTaskProto() *datapb.CompactionTask {
task := t.taskProto.Load()
if task == nil {
return nil
}
return task.(*datapb.CompactionTask)
}
func newL0CompactionTask(t *datapb.CompactionTask, allocator allocator.Allocator, meta CompactionMeta) *l0CompactionTask {
task := &l0CompactionTask{
allocator: allocator,
meta: meta,
times: taskcommon.NewTimes(),
}
task.taskProto.Store(t)
return task
}
// Note: return True means exit this state machine.
// ONLY return True for Completed, Failed
func (t *l0CompactionTask) Process() bool {
switch t.GetTaskProto().GetState() {
case datapb.CompactionTaskState_meta_saved:
return t.processMetaSaved()
case datapb.CompactionTaskState_completed:
return t.processCompleted()
case datapb.CompactionTaskState_failed:
return true
case datapb.CompactionTaskState_timeout:
return true
default:
return false
}
}
func (t *l0CompactionTask) processMetaSaved() bool {
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_completed))
if err != nil {
log.Warn("l0CompactionTask unable to processMetaSaved", zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Error(err))
return false
}
return t.processCompleted()
}
func (t *l0CompactionTask) processCompleted() bool {
t.resetSegmentCompacting()
task := t.taskProto.Load().(*datapb.CompactionTask)
log.Info("l0CompactionTask processCompleted done", zap.Int64("planID", task.GetPlanID()),
zap.Duration("costs", time.Duration(task.GetEndTime()-task.GetStartTime())*time.Second))
return true
}
func (t *l0CompactionTask) doClean() error {
log := log.With(zap.Int64("planID", t.GetTaskProto().GetPlanID()))
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_cleaned))
if err != nil {
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
return err
}
// resetSegmentCompacting must be the last step of Clean, to make sure resetSegmentCompacting only called once
// otherwise, it may unlock segments locked by other compaction tasks
t.resetSegmentCompacting()
log.Info("l0CompactionTask clean done")
return nil
}
func (t *l0CompactionTask) Clean() bool {
return t.doClean() == nil
}
func (t *l0CompactionTask) SetTask(task *datapb.CompactionTask) {
t.taskProto.Store(task)
}
func (t *l0CompactionTask) GetLabel() string {
return fmt.Sprintf("%d-%s", t.GetTaskProto().PartitionID, t.GetTaskProto().GetChannel())
}
func (t *l0CompactionTask) NeedReAssignNodeID() bool {
return t.GetTaskProto().GetState() == datapb.CompactionTaskState_pipelining && (!t.hasAssignedWorker())
}
func (t *l0CompactionTask) ShadowClone(opts ...compactionTaskOpt) *datapb.CompactionTask {
taskClone := proto.Clone(t.GetTaskProto()).(*datapb.CompactionTask)
for _, opt := range opts {
opt(taskClone)
}
return taskClone
}
func (t *l0CompactionTask) selectFlushedSegment() ([]*SegmentInfo, []*datapb.CompactionSegmentBinlogs, error) {
taskProto := t.taskProto.Load().(*datapb.CompactionTask)
// Select flushed L1/L2 segments for LevelZero compaction that meets the condition:
// dmlPos < triggerInfo.pos
flushedSegments := t.meta.SelectSegments(context.TODO(), WithCollection(taskProto.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
return (taskProto.GetPartitionID() == common.AllPartitionsID || info.GetPartitionID() == taskProto.GetPartitionID()) &&
info.GetInsertChannel() == taskProto.GetChannel() &&
(info.GetState() == commonpb.SegmentState_Sealed || isFlushState(info.GetState())) &&
!info.GetIsImporting() &&
info.GetLevel() != datapb.SegmentLevel_L0 &&
info.GetStartPosition().GetTimestamp() < taskProto.GetPos().GetTimestamp()
}))
sealedSegBinlogs := []*datapb.CompactionSegmentBinlogs{}
for _, info := range flushedSegments {
// Sealed is unexpected, fail fast
if info.GetState() == commonpb.SegmentState_Sealed {
return nil, nil, fmt.Errorf("L0 compaction selected invalid sealed segment %d", info.GetID())
}
sealedSegBinlogs = append(sealedSegBinlogs, &datapb.CompactionSegmentBinlogs{
SegmentID: info.GetID(),
Field2StatslogPaths: info.GetStatslogs(),
InsertChannel: info.GetInsertChannel(),
Level: info.GetLevel(),
CollectionID: info.GetCollectionID(),
PartitionID: info.GetPartitionID(),
IsSorted: info.GetIsSorted(),
})
}
return flushedSegments, sealedSegBinlogs, nil
}
func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, error) {
compactionParams, err := compaction.GenerateJSONParams()
if err != nil {
return nil, err
}
taskProto := t.taskProto.Load().(*datapb.CompactionTask)
plan := &datapb.CompactionPlan{
PlanID: taskProto.GetPlanID(),
StartTime: taskProto.GetStartTime(),
Type: taskProto.GetType(),
Channel: taskProto.GetChannel(),
CollectionTtl: taskProto.GetCollectionTtl(),
TotalRows: taskProto.GetTotalRows(),
Schema: taskProto.GetSchema(),
SlotUsage: t.GetSlotUsage(),
JsonParams: compactionParams,
}
log := log.With(zap.Int64("taskID", taskProto.GetTriggerID()), zap.Int64("planID", plan.GetPlanID()))
segments := make([]*SegmentInfo, 0)
for _, segID := range taskProto.GetInputSegments() {
segInfo := t.meta.GetHealthySegment(context.TODO(), segID)
if segInfo == nil {
return nil, merr.WrapErrSegmentNotFound(segID)
}
plan.SegmentBinlogs = append(plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{
SegmentID: segID,
CollectionID: segInfo.GetCollectionID(),
PartitionID: segInfo.GetPartitionID(),
Level: segInfo.GetLevel(),
InsertChannel: segInfo.GetInsertChannel(),
Deltalogs: segInfo.GetDeltalogs(),
IsSorted: segInfo.GetIsSorted(),
})
segments = append(segments, segInfo)
}
flushedSegments, flushedSegBinlogs, err := t.selectFlushedSegment()
if err != nil {
log.Warn("invalid L0 compaction plan, unable to select flushed segments", zap.Error(err))
return nil, err
}
if len(flushedSegments) == 0 {
// TODO fast finish l0 segment, just drop l0 segment
log.Info("l0Compaction available non-L0 Segments is empty ")
return nil, errors.Errorf("Selected zero L1/L2 segments for the position=%v", taskProto.GetPos())
}
segments = append(segments, flushedSegments...)
logIDRange, err := PreAllocateBinlogIDs(t.allocator, segments)
if err != nil {
return nil, err
}
plan.PreAllocatedLogIDs = logIDRange
// BeginLogID is deprecated, but still assign it for compatibility.
plan.BeginLogID = logIDRange.Begin
plan.SegmentBinlogs = append(plan.SegmentBinlogs, flushedSegBinlogs...)
log.Info("l0CompactionTask refreshed level zero compaction plan",
zap.Any("target position", taskProto.GetPos()),
zap.Any("target segments count", len(flushedSegBinlogs)),
zap.Any("PreAllocatedLogIDs", logIDRange))
WrapPluginContext(taskProto.GetCollectionID(), taskProto.GetSchema().GetProperties(), plan)
return plan, nil
}
func (t *l0CompactionTask) resetSegmentCompacting() {
t.meta.SetSegmentsCompacting(context.TODO(), t.GetTaskProto().GetInputSegments(), false)
}
func (t *l0CompactionTask) hasAssignedWorker() bool {
return t.GetTaskProto().GetNodeID() != 0 && t.GetTaskProto().GetNodeID() != NullNodeID
}
func (t *l0CompactionTask) SetNodeID(id UniqueID) error {
return t.updateAndSaveTaskMeta(setNodeID(id))
}
func (t *l0CompactionTask) SaveTaskMeta() error {
return t.saveTaskMeta(t.GetTaskProto())
}
func (t *l0CompactionTask) updateAndSaveTaskMeta(opts ...compactionTaskOpt) error {
// if task state is completed, cleaned, failed, timeout, then do append end time and save
if t.GetTaskProto().State == datapb.CompactionTaskState_completed ||
t.GetTaskProto().State == datapb.CompactionTaskState_cleaned ||
t.GetTaskProto().State == datapb.CompactionTaskState_failed ||
t.GetTaskProto().State == datapb.CompactionTaskState_timeout {
ts := time.Now().Unix()
opts = append(opts, setEndTime(ts))
}
task := t.ShadowClone(opts...)
err := t.saveTaskMeta(task)
if err != nil {
return err
}
t.SetTask(task)
return nil
}
func (t *l0CompactionTask) saveTaskMeta(task *datapb.CompactionTask) error {
return t.meta.SaveCompactionTask(context.TODO(), task)
}
func (t *l0CompactionTask) saveSegmentMeta(result *datapb.CompactionPlanResult) error {
var operators []UpdateOperator
for _, seg := range result.GetSegments() {
operators = append(operators, AddBinlogsOperator(seg.GetSegmentID(), nil, nil, seg.GetDeltalogs(), nil))
}
for _, segID := range t.GetTaskProto().InputSegments {
operators = append(operators, UpdateStatusOperator(segID, commonpb.SegmentState_Dropped), UpdateCompactedOperator(segID))
}
log.Info("meta update: update segments info for level zero compaction",
zap.Int64("planID", t.GetTaskProto().GetPlanID()),
)
return t.meta.UpdateSegmentsInfo(context.TODO(), operators...)
}
func (t *l0CompactionTask) GetSlotUsage() int64 {
return t.GetTaskSlot()
}