milvus/internal/metrics/datanode.go
bigsheeper 13177a90af
Add prometheus metrics for DataNode (#15650)
Signed-off-by: bigsheeper <yihao.dai@zilliz.com>

Co-authored-by: Cai Yudong <yudong.cai@zilliz.com>
2022-02-28 19:11:55 +08:00

236 lines
7.1 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
const (
// TODO: use the common status label
DataNodeMetricLabelSuccess = "success"
DataNodeMetricLabelFail = "fail"
DataNodeMetricLabelTotal = "total"
DataNodeMsgTypeInsert = "insert"
DataNodeMsgTypeDelete = "delete"
)
// TODO: move to metrics.go
const (
nodeIDLabelName = "node_id"
statusLabelName = "status"
msgTypeLabelName = "msg_type"
collectionIDLabelName = "collection_id"
channelNameLabelName = "channel_name"
)
// dataNodeDurationBuckets involves durations in milliseconds,
// [10 20 40 80 160 320 640 1280 2560 5120 10240 20480 40960 81920 163840 327680 655360 1.31072e+06]
var dataNodeDurationBuckets = prometheus.ExponentialBuckets(10, 2, 18)
var (
DataNodeNumFlowGraphs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_flow_graphs",
Help: "Number of flow graphs in DataNode.",
}, []string{
nodeIDLabelName,
})
DataNodeConsumeMsgRowsCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "message_rows_count",
Help: "Messages rows size count consumed from msgStream in DataNode.",
}, []string{
msgTypeLabelName,
nodeIDLabelName,
})
DataNodeFlushedSize = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "flushed_size",
Help: "Data size flushed to storage in DataNode.",
}, []string{
msgTypeLabelName,
nodeIDLabelName,
})
DataNodeNumDmlChannels = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_dml_channels",
Help: "Number of dmlChannels per collection in DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeNumDeltaChannels = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_delta_channels",
Help: "Number of deltaChannels per collection in DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeNumConsumers = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_consumers",
Help: "Number of consumers per collection in DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeNumProducers = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_producers",
Help: "Number of producers per collection in DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeTimeSync = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "time_sync",
Help: "Synchronized timestamps per channel in DataNode.",
}, []string{
channelNameLabelName,
nodeIDLabelName,
})
DataNodeSegmentRowsCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "seg_rows_count",
Help: "Rows count of segments which sent to DataCoord from DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeNumUnflushedSegments = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "num_unflushed_segments",
Help: "Number of unflushed segments in DataNode.",
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeFlushSegmentLatency = prometheus.NewHistogramVec( // TODO: arguably
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "flush_segment_latency",
Help: "The flush segment latency in DataNode.",
Buckets: dataNodeDurationBuckets,
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
DataNodeSave2StorageLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "save_latency",
Help: "The latency saving flush data to storage in DataNode.",
Buckets: []float64{0, 10, 100, 200, 400, 1000, 10000},
}, []string{
msgTypeLabelName,
nodeIDLabelName,
})
DataNodeFlushSegmentCount = prometheus.NewCounterVec( // TODO: arguably
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "flush_segment_count",
Help: "Flush segment statistics in DataNode.",
}, []string{
statusLabelName,
nodeIDLabelName,
})
DataNodeAutoFlushSegmentCount = prometheus.NewCounterVec( // TODO: arguably
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "auto_flush_segment_count",
Help: "Auto flush segment statistics in DataNode.",
}, []string{
channelNameLabelName,
nodeIDLabelName,
})
DataNodeCompactionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "compaction_latency",
Help: "Compaction latency in DataNode.",
Buckets: dataNodeDurationBuckets,
}, []string{
collectionIDLabelName,
nodeIDLabelName,
})
)
//RegisterDataNode registers DataNode metrics
func RegisterDataNode() {
prometheus.MustRegister(DataNodeNumFlowGraphs)
prometheus.MustRegister(DataNodeConsumeMsgRowsCount)
prometheus.MustRegister(DataNodeFlushedSize)
prometheus.MustRegister(DataNodeNumDmlChannels)
prometheus.MustRegister(DataNodeNumDeltaChannels)
prometheus.MustRegister(DataNodeNumConsumers)
prometheus.MustRegister(DataNodeNumProducers)
prometheus.MustRegister(DataNodeTimeSync)
prometheus.MustRegister(DataNodeSegmentRowsCount)
prometheus.MustRegister(DataNodeNumUnflushedSegments)
prometheus.MustRegister(DataNodeFlushSegmentLatency)
prometheus.MustRegister(DataNodeSave2StorageLatency)
prometheus.MustRegister(DataNodeFlushSegmentCount)
prometheus.MustRegister(DataNodeAutoFlushSegmentCount)
prometheus.MustRegister(DataNodeCompactionLatency)
}