From b09e7aeaf70046eace16bec6326231b11f49aba2 Mon Sep 17 00:00:00 2001 From: MrPresent-Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Tue, 30 May 2023 14:59:28 +0800 Subject: [PATCH] support detailed task metrics(#23414) (#24507) Signed-off-by: MrPresent-Han --- internal/querycoordv2/task/scheduler.go | 39 +++++++++++++++++++++++-- pkg/metrics/querycoord_metrics.go | 14 ++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/internal/querycoordv2/task/scheduler.go b/internal/querycoordv2/task/scheduler.go index 0529023abc..6d0f6d6f94 100644 --- a/internal/querycoordv2/task/scheduler.go +++ b/internal/querycoordv2/task/scheduler.go @@ -243,11 +243,46 @@ func (scheduler *taskScheduler) Add(task Task) error { scheduler.channelTasks[index] = task } - metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len())) + scheduler.updateTaskMetrics() log.Info("task added", zap.String("task", task.String())) return nil } +func (scheduler *taskScheduler) updateTaskMetrics() { + segmentGrowNum, segmentReduceNum, segmentMoveNum := 0, 0, 0 + channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0 + for _, task := range scheduler.segmentTasks { + taskType := GetTaskType(task) + switch taskType { + case TaskTypeGrow: + segmentGrowNum++ + case TaskTypeReduce: + segmentReduceNum++ + case TaskTypeMove: + segmentMoveNum++ + } + } + + for _, task := range scheduler.channelTasks { + taskType := GetTaskType(task) + switch taskType { + case TaskTypeGrow: + channelGrowNum++ + case TaskTypeReduce: + channelReduceNum++ + case TaskTypeMove: + channelMoveNum++ + } + } + + metrics.QueryCoordTaskNum.WithLabelValues(metrics.SegmentGrowTaskLabel).Set(float64(segmentGrowNum)) + metrics.QueryCoordTaskNum.WithLabelValues(metrics.SegmentReduceTaskLabel).Set(float64(segmentReduceNum)) + metrics.QueryCoordTaskNum.WithLabelValues(metrics.SegmentMoveTaskLabel).Set(float64(segmentMoveNum)) + metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum)) + metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum)) + metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum)) +} + // check whether the task is valid to add, // must hold lock func (scheduler *taskScheduler) preAdd(task Task) error { @@ -656,7 +691,7 @@ func (scheduler *taskScheduler) remove(task Task) { log = log.With(zap.String("channel", task.Channel())) } - metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len())) + scheduler.updateTaskMetrics() log.Debug("task removed", zap.Stack("stack")) } diff --git a/pkg/metrics/querycoord_metrics.go b/pkg/metrics/querycoord_metrics.go index 42a35f9641..43ccce4abc 100644 --- a/pkg/metrics/querycoord_metrics.go +++ b/pkg/metrics/querycoord_metrics.go @@ -22,6 +22,18 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +const ( + SegmentGrowTaskLabel = "segment_grow" + SegmentReduceTaskLabel = "segment_reduce" + SegmentMoveTaskLabel = "segment_move" + + ChannelGrowTaskLabel = "channel_grow" + ChannelReduceTaskLabel = "channel_reduce" + ChannelMoveTaskLabel = "channel_move" + + QueryCoordTaskType = "querycoord_task_type" +) + var ( QueryCoordNumCollections = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -83,7 +95,7 @@ var ( Subsystem: typeutil.QueryCoordRole, Name: "task_num", Help: "the number of tasks in QueryCoord's scheduler", - }, []string{}) + }, []string{QueryCoordTaskType}) QueryCoordNumQueryNodes = prometheus.NewGaugeVec( prometheus.GaugeOpts{