From 74ad7c460159acc2b0a2d951ce8ad4e8058a5472 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Sun, 16 Jul 2023 18:48:33 +0800 Subject: [PATCH] Fix monitoring metrics (#25549) Signed-off-by: bigsheeper --- internal/rootcoord/meta_table.go | 12 +++++++++--- internal/rootcoord/proxy_client_manager.go | 8 ++++++++ internal/rootcoord/proxy_manager.go | 3 --- internal/rootcoord/quota_center.go | 6 +++--- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/internal/rootcoord/meta_table.go b/internal/rootcoord/meta_table.go index f751d40e91..ff1e4dbf7f 100644 --- a/internal/rootcoord/meta_table.go +++ b/internal/rootcoord/meta_table.go @@ -137,6 +137,9 @@ func (mt *MetaTable) reload() error { collectionNum := int64(0) partitionNum := int64(0) + metrics.RootCoordNumOfCollections.Set(float64(0)) + metrics.RootCoordNumOfPartitions.WithLabelValues().Set(float64(0)) + // recover databases. dbs, err := mt.catalog.ListDatabases(mt.ctx, typeutil.MaxTimestamp) if err != nil { @@ -195,8 +198,8 @@ func (mt *MetaTable) reload() error { } } - metrics.RootCoordNumOfCollections.Set(float64(collectionNum)) - metrics.RootCoordNumOfPartitions.WithLabelValues().Set(float64(partitionNum)) + metrics.RootCoordNumOfCollections.Add(float64(collectionNum)) + metrics.RootCoordNumOfPartitions.WithLabelValues().Add(float64(partitionNum)) log.Info("RootCoord meta table reload done", zap.Duration("duration", record.ElapseSpan())) return nil } @@ -230,7 +233,10 @@ func (mt *MetaTable) reloadWithNonDatabase() error { for _, alias := range aliases { mt.aliases.insert(util.DefaultDBName, alias.Name, alias.CollectionID) } - return err + + metrics.RootCoordNumOfCollections.Add(float64(collectionNum)) + metrics.RootCoordNumOfPartitions.WithLabelValues().Add(float64(partitionNum)) + return nil } func (mt *MetaTable) createDefaultDb() error { diff --git a/internal/rootcoord/proxy_client_manager.go b/internal/rootcoord/proxy_client_manager.go index e2b3f84104..f6729ee026 100644 --- a/internal/rootcoord/proxy_client_manager.go +++ b/internal/rootcoord/proxy_client_manager.go @@ -32,6 +32,7 @@ import ( "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/internal/util/sessionutil" "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/util/metricsinfo" ) @@ -89,6 +90,7 @@ func (p *proxyClientManager) AddProxyClient(session *sessionutil.Session) { } p.connect(session) + p.updateProxyNumMetric() } // GetProxyCount returns number of proxy clients. @@ -99,6 +101,11 @@ func (p *proxyClientManager) GetProxyCount() int { return len(p.proxyClient) } +// mutex.Lock is required before calling this method. +func (p *proxyClientManager) updateProxyNumMetric() { + metrics.RootCoordProxyCounter.WithLabelValues().Set(float64(len(p.proxyClient))) +} + func (p *proxyClientManager) connect(session *sessionutil.Session) { pc, err := p.creator(context.Background(), session.Address) if err != nil { @@ -129,6 +136,7 @@ func (p *proxyClientManager) DelProxyClient(s *sessionutil.Session) { } delete(p.proxyClient, s.ServerID) + p.updateProxyNumMetric() log.Info("remove proxy client", zap.String("proxy address", s.Address), zap.Int64("proxy id", s.ServerID)) } diff --git a/internal/rootcoord/proxy_manager.go b/internal/rootcoord/proxy_manager.go index da0e4b3ce7..9521930397 100644 --- a/internal/rootcoord/proxy_manager.go +++ b/internal/rootcoord/proxy_manager.go @@ -31,7 +31,6 @@ import ( etcdkv "github.com/milvus-io/milvus/internal/kv/etcd" "github.com/milvus-io/milvus/internal/util/sessionutil" "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -153,7 +152,6 @@ func (p *proxyManager) handlePutEvent(e *clientv3.Event) error { for _, f := range p.addSessionsFunc { f(session) } - metrics.RootCoordProxyCounter.WithLabelValues().Inc() return nil } @@ -166,7 +164,6 @@ func (p *proxyManager) handleDeleteEvent(e *clientv3.Event) error { for _, f := range p.delSessionsFunc { f(session) } - metrics.RootCoordProxyCounter.WithLabelValues().Dec() return nil } diff --git a/internal/rootcoord/quota_center.go b/internal/rootcoord/quota_center.go index b49ee66c6f..b91d023956 100644 --- a/internal/rootcoord/quota_center.go +++ b/internal/rootcoord/quota_center.go @@ -847,15 +847,15 @@ func (q *QuotaCenter) setRates() error { // recordMetrics records metrics of quota states. func (q *QuotaCenter) recordMetrics() { record := func(errorCode commonpb.ErrorCode) { + var hasException float64 = 0 for _, states := range q.quotaStates { for _, state := range states { if state == errorCode { - metrics.RootCoordQuotaStates.WithLabelValues(errorCode.String()).Set(1) + hasException = 1 } } - return } - metrics.RootCoordQuotaStates.WithLabelValues(errorCode.String()).Set(0) + metrics.RootCoordQuotaStates.WithLabelValues(errorCode.String()).Set(hasException) } record(commonpb.ErrorCode_MemoryQuotaExhausted) record(commonpb.ErrorCode_DiskQuotaExhausted)