enhance: [Cherry-pick] Add metrics for querycoord current target cp lag (#31391) (#31463)

Cherry-pick from master
pr: #31391 #31399
See also #31390

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2024-03-21 10:17:07 +08:00 committed by GitHub
parent fef430daed
commit 94f3aec80a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 0 deletions

View File

@ -18,6 +18,7 @@ package meta
import (
"context"
"fmt"
"sync"
"github.com/cockroachdb/errors"
@ -26,8 +27,11 @@ import (
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -84,6 +88,13 @@ func (mgr *TargetManager) UpdateCollectionCurrentTarget(collectionID int64) bool
zap.Strings("channels", newTarget.GetAllDmChannelNames()),
zap.Int64("version", newTarget.GetTargetVersion()),
)
for channelName, dmlChannel := range newTarget.dmChannels {
ts, _ := tsoutil.ParseTS(dmlChannel.GetSeekPosition().GetTimestamp())
metrics.QueryCoordCurrentTargetCheckpointUnixSeconds.WithLabelValues(
fmt.Sprint(paramtable.GetNodeID()),
channelName,
).Set(float64(ts.Unix()))
}
return true
}
@ -255,6 +266,16 @@ func (mgr *TargetManager) RemoveCollection(collectionID int64) {
log.Info("remove collection from targets",
zap.Int64("collectionID", collectionID))
current := mgr.current.getCollectionTarget(collectionID)
if current != nil {
for channelName := range current.GetAllDmChannels() {
metrics.QueryCoordCurrentTargetCheckpointUnixSeconds.DeleteLabelValues(
fmt.Sprint(paramtable.GetNodeID()),
channelName,
)
}
}
mgr.current.removeCollectionTarget(collectionID)
mgr.next.removeCollectionTarget(collectionID)
}

View File

@ -104,6 +104,17 @@ var (
Name: "querynode_num",
Help: "number of QueryNodes managered by QueryCoord",
}, []string{})
QueryCoordCurrentTargetCheckpointUnixSeconds = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.QueryCoordRole,
Name: "current_target_checkpoint_unix_seconds",
Help: "current target checkpoint timestamp in unix seconds",
}, []string{
nodeIDLabelName,
channelNameLabelName,
})
)
// RegisterQueryCoord registers QueryCoord metrics
@ -116,4 +127,5 @@ func RegisterQueryCoord(registry *prometheus.Registry) {
registry.MustRegister(QueryCoordReleaseLatency)
registry.MustRegister(QueryCoordTaskNum)
registry.MustRegister(QueryCoordNumQueryNodes)
registry.MustRegister(QueryCoordCurrentTargetCheckpointUnixSeconds)
}