From 94f3aec80a032bdb853dc8a1d9a4fdbf7ca62b27 Mon Sep 17 00:00:00 2001 From: congqixia Date: Thu, 21 Mar 2024 10:17:07 +0800 Subject: [PATCH] enhance: [Cherry-pick] Add metrics for querycoord current target cp lag (#31391) (#31463) Cherry-pick from master pr: #31391 #31399 See also #31390 --------- Signed-off-by: Congqi Xia --- internal/querycoordv2/meta/target_manager.go | 21 ++++++++++++++++++++ pkg/metrics/querycoord_metrics.go | 12 +++++++++++ 2 files changed, 33 insertions(+) diff --git a/internal/querycoordv2/meta/target_manager.go b/internal/querycoordv2/meta/target_manager.go index 7f4aae83e1..ebf298a030 100644 --- a/internal/querycoordv2/meta/target_manager.go +++ b/internal/querycoordv2/meta/target_manager.go @@ -18,6 +18,7 @@ package meta import ( "context" + "fmt" "sync" "github.com/cockroachdb/errors" @@ -26,8 +27,11 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/retry" + "github.com/milvus-io/milvus/pkg/util/tsoutil" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -84,6 +88,13 @@ func (mgr *TargetManager) UpdateCollectionCurrentTarget(collectionID int64) bool zap.Strings("channels", newTarget.GetAllDmChannelNames()), zap.Int64("version", newTarget.GetTargetVersion()), ) + for channelName, dmlChannel := range newTarget.dmChannels { + ts, _ := tsoutil.ParseTS(dmlChannel.GetSeekPosition().GetTimestamp()) + metrics.QueryCoordCurrentTargetCheckpointUnixSeconds.WithLabelValues( + fmt.Sprint(paramtable.GetNodeID()), + channelName, + ).Set(float64(ts.Unix())) + } return true } @@ -255,6 +266,16 @@ func (mgr *TargetManager) RemoveCollection(collectionID int64) { log.Info("remove collection from targets", zap.Int64("collectionID", collectionID)) + current := mgr.current.getCollectionTarget(collectionID) + if current != nil { + for channelName := range current.GetAllDmChannels() { + metrics.QueryCoordCurrentTargetCheckpointUnixSeconds.DeleteLabelValues( + fmt.Sprint(paramtable.GetNodeID()), + channelName, + ) + } + } + mgr.current.removeCollectionTarget(collectionID) mgr.next.removeCollectionTarget(collectionID) } diff --git a/pkg/metrics/querycoord_metrics.go b/pkg/metrics/querycoord_metrics.go index 43ccce4abc..87448c53e4 100644 --- a/pkg/metrics/querycoord_metrics.go +++ b/pkg/metrics/querycoord_metrics.go @@ -104,6 +104,17 @@ var ( Name: "querynode_num", Help: "number of QueryNodes managered by QueryCoord", }, []string{}) + + QueryCoordCurrentTargetCheckpointUnixSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: milvusNamespace, + Subsystem: typeutil.QueryCoordRole, + Name: "current_target_checkpoint_unix_seconds", + Help: "current target checkpoint timestamp in unix seconds", + }, []string{ + nodeIDLabelName, + channelNameLabelName, + }) ) // RegisterQueryCoord registers QueryCoord metrics @@ -116,4 +127,5 @@ func RegisterQueryCoord(registry *prometheus.Registry) { registry.MustRegister(QueryCoordReleaseLatency) registry.MustRegister(QueryCoordTaskNum) registry.MustRegister(QueryCoordNumQueryNodes) + registry.MustRegister(QueryCoordCurrentTargetCheckpointUnixSeconds) }