enhance: add end time stats for compaction and index build tasks (#38048)

issue: #36621

Signed-off-by: jaime <yun.zhang@zilliz.com>
This commit is contained in:
jaime 2024-12-06 16:28:40 +08:00 committed by GitHub
parent 0d4073bdd8
commit 60dd55f292
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 50 additions and 28 deletions

View File

@ -116,7 +116,7 @@ func (t *clusteringCompactionTask) Process() bool {
if t.GetTaskProto().State == datapb.CompactionTaskState_completed || t.GetTaskProto().State == datapb.CompactionTaskState_cleaned {
updateOps = append(updateOps, setEndTime(ts))
elapse := ts - t.GetTaskProto().StartTime
log.Info("clustering compaction task total elapse", zap.Int64("elapse seconds", elapse))
log.Info("clustering compaction task total elapse", zap.Duration("costs", time.Duration(elapse)*time.Second))
metrics.DataCoordCompactionLatency.
WithLabelValues(fmt.Sprint(typeutil.IsVectorType(t.GetTaskProto().GetClusteringKeyField().DataType)), fmt.Sprint(t.GetTaskProto().CollectionID), t.GetTaskProto().Channel, datapb.CompactionType_ClusteringCompaction.String(), "total").
Observe(float64(elapse * 1000))

View File

@ -19,6 +19,7 @@ package datacoord
import (
"context"
"fmt"
"time"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
@ -129,6 +130,9 @@ func (t *l0CompactionTask) processExecuting() bool {
log.Warn("l0CompactionTask failed to get compaction result", zap.Error(err))
return false
}
ts := time.Now().Unix()
updateOps := []compactionTaskOpt{setEndTime(ts)}
switch result.GetState() {
case datapb.CompactionTaskState_completed:
t.result = result
@ -137,13 +141,15 @@ func (t *l0CompactionTask) processExecuting() bool {
return false
}
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved)); err != nil {
updateOps = append(updateOps, setState(datapb.CompactionTaskState_meta_saved))
if err := t.updateAndSaveTaskMeta(updateOps...); err != nil {
log.Warn("l0CompactionTask failed to save task meta_saved state", zap.Error(err))
return false
}
return t.processMetaSaved()
case datapb.CompactionTaskState_failed:
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)); err != nil {
updateOps = append(updateOps, setState(datapb.CompactionTaskState_failed))
if err := t.updateAndSaveTaskMeta(updateOps...); err != nil {
log.Warn("l0CompactionTask failed to set task failed state", zap.Error(err))
return false
}
@ -153,7 +159,9 @@ func (t *l0CompactionTask) processExecuting() bool {
}
func (t *l0CompactionTask) processMetaSaved() bool {
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_completed))
ts := time.Now().Unix()
updateOps := []compactionTaskOpt{setEndTime(ts), setState(datapb.CompactionTaskState_completed)}
err := t.updateAndSaveTaskMeta(updateOps...)
if err != nil {
log.Warn("l0CompactionTask unable to processMetaSaved", zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Error(err))
return false
@ -173,7 +181,9 @@ func (t *l0CompactionTask) processCompleted() bool {
t.resetSegmentCompacting()
UpdateCompactionSegmentSizeMetrics(t.result.GetSegments())
log.Info("l0CompactionTask processCompleted done", zap.Int64("planID", t.GetTaskProto().GetPlanID()))
task := t.taskProto.Load().(*datapb.CompactionTask)
log.Info("l0CompactionTask processCompleted done", zap.Int64("planID", task.GetPlanID()),
zap.Duration("costs", time.Duration(task.GetEndTime()-task.GetStartTime())*time.Second))
return true
}

View File

@ -3,6 +3,7 @@ package datacoord
import (
"context"
"fmt"
"time"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
@ -93,7 +94,9 @@ func (t *mixCompactionTask) processPipelining() bool {
func (t *mixCompactionTask) processMetaSaved() bool {
log := log.With(zap.Int64("triggerID", t.GetTaskProto().GetTriggerID()), zap.Int64("PlanID", t.GetTaskProto().GetPlanID()), zap.Int64("collectionID", t.GetTaskProto().GetCollectionID()))
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_completed)); err != nil {
ts := time.Now().Unix()
updateOps := []compactionTaskOpt{setEndTime(ts), setState(datapb.CompactionTaskState_completed)}
if err := t.updateAndSaveTaskMeta(updateOps...); err != nil {
log.Warn("mixCompactionTask failed to proccessMetaSaved", zap.Error(err))
return false
}
@ -113,12 +116,15 @@ func (t *mixCompactionTask) processExecuting() bool {
log.Warn("mixCompactionTask failed to get compaction result", zap.Error(err))
return false
}
ts := time.Now().Unix()
failedUpdateOps := []compactionTaskOpt{setEndTime(ts), setState(datapb.CompactionTaskState_failed)}
switch result.GetState() {
case datapb.CompactionTaskState_completed:
t.result = result
if len(result.GetSegments()) == 0 {
log.Info("illegal compaction results")
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed))
err := t.updateAndSaveTaskMeta(failedUpdateOps...)
if err != nil {
log.Warn("mixCompactionTask failed to setState failed", zap.Error(err))
return false
@ -128,7 +134,7 @@ func (t *mixCompactionTask) processExecuting() bool {
if err := t.saveSegmentMeta(); err != nil {
log.Warn("mixCompactionTask failed to save segment meta", zap.Error(err))
if errors.Is(err, merr.ErrIllegalCompactionPlan) {
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed))
err := t.updateAndSaveTaskMeta(failedUpdateOps...)
if err != nil {
log.Warn("mixCompactionTask failed to setState failed", zap.Error(err))
return false
@ -145,7 +151,7 @@ func (t *mixCompactionTask) processExecuting() bool {
return t.processMetaSaved()
case datapb.CompactionTaskState_failed:
log.Info("mixCompactionTask fail in datanode")
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed))
err := t.updateAndSaveTaskMeta(failedUpdateOps...)
if err != nil {
log.Warn("fail to updateAndSaveTaskMeta")
}
@ -231,8 +237,10 @@ func (t *mixCompactionTask) processCompleted() bool {
t.resetSegmentCompacting()
UpdateCompactionSegmentSizeMetrics(t.result.GetSegments())
log.Info("mixCompactionTask processCompleted done")
task := t.GetTaskProto()
log.Info("mixCompactionTask processCompleted done",
zap.Int64("planID", task.GetPlanID()), zap.Duration("costs", time.Duration(task.GetEndTime()-task.GetStartTime())*time.Second))
return true
}

View File

@ -68,15 +68,16 @@ type indexMeta struct {
func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
return &metricsinfo.IndexTaskStats{
IndexID: s.IndexID,
CollectionID: s.CollectionID,
SegmentID: s.SegmentID,
BuildID: s.BuildID,
IndexState: s.IndexState.String(),
FailReason: s.FailReason,
IndexSize: s.IndexSize,
IndexVersion: s.IndexVersion,
CreatedUTCTime: typeutil.TimestampToString(s.CreatedUTCTime),
IndexID: s.IndexID,
CollectionID: s.CollectionID,
SegmentID: s.SegmentID,
BuildID: s.BuildID,
IndexState: s.IndexState.String(),
FailReason: s.FailReason,
IndexSize: s.IndexSize,
IndexVersion: s.IndexVersion,
CreatedUTCTime: typeutil.TimestampToString(s.CreatedUTCTime),
FinishedUTCTime: typeutil.TimestampToString(s.FinishedUTCTime),
}
}
@ -789,6 +790,7 @@ func (m *indexMeta) FinishTask(taskInfo *workerpb.IndexTaskInfo) error {
segIdx.FailReason = taskInfo.GetFailReason()
segIdx.IndexSize = taskInfo.GetSerializedSize()
segIdx.CurrentIndexVersion = taskInfo.GetCurrentIndexVersion()
segIdx.FinishedUTCTime = uint64(time.Now().Unix())
return m.alterSegmentIndexes([]*model.SegmentIndex{segIdx})
}

View File

@ -25,6 +25,7 @@ type SegmentIndex struct {
WriteHandoff bool
CurrentIndexVersion int32
IndexStoreVersion int64
FinishedUTCTime uint64
}
func UnmarshalSegmentIndexModel(segIndex *indexpb.SegmentIndex) *SegmentIndex {

View File

@ -303,15 +303,16 @@ type DataNodeConfiguration struct {
}
type IndexTaskStats struct {
IndexID int64 `json:"index_id,omitempty,string"`
CollectionID int64 `json:"collection_id,omitempty,string"`
SegmentID int64 `json:"segment_id,omitempty,string"`
BuildID int64 `json:"build_id,omitempty,string"`
IndexState string `json:"index_state,omitempty"`
FailReason string `json:"fail_reason,omitempty"`
IndexSize uint64 `json:"index_size,omitempty,string"`
IndexVersion int64 `json:"index_version,omitempty,string"`
CreatedUTCTime string `json:"create_time,omitempty"`
IndexID int64 `json:"index_id,omitempty,string"`
CollectionID int64 `json:"collection_id,omitempty,string"`
SegmentID int64 `json:"segment_id,omitempty,string"`
BuildID int64 `json:"build_id,omitempty,string"`
IndexState string `json:"index_state,omitempty"`
FailReason string `json:"fail_reason,omitempty"`
IndexSize uint64 `json:"index_size,omitempty,string"`
IndexVersion int64 `json:"index_version,omitempty,string"`
CreatedUTCTime string `json:"create_time,omitempty"`
FinishedUTCTime string `json:"finished_time,omitempty"`
}
type SyncTask struct {