mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
#46133 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
252 lines
7.8 KiB
Go
252 lines
7.8 KiB
Go
/*
|
|
* # Licensed to the LF AI & Data foundation under one
|
|
* # or more contributor license agreements. See the NOTICE file
|
|
* # distributed with this work for additional information
|
|
* # regarding copyright ownership. The ASF licenses this file
|
|
* # to you under the Apache License, Version 2.0 (the
|
|
* # "License"); you may not use this file except in compliance
|
|
* # with the License. You may obtain a copy of the License at
|
|
* #
|
|
* # http://www.apache.org/licenses/LICENSE-2.0
|
|
* #
|
|
* # Unless required by applicable law or agreed to in writing, software
|
|
* # distributed under the License is distributed on an "AS IS" BASIS,
|
|
* # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* # See the License for the specific language governing permissions and
|
|
* # limitations under the License.
|
|
*/
|
|
|
|
package metrics
|
|
|
|
/*
|
|
#cgo pkg-config: milvus_core
|
|
|
|
#include <stdlib.h>
|
|
#include "segcore/metrics_c.h"
|
|
#include "monitor/monitor_c.h"
|
|
#include "monitor/jemalloc_stats_c.h"
|
|
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
"unsafe"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
dto "github.com/prometheus/client_model/go"
|
|
"github.com/prometheus/common/expfmt"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/exp/maps"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
)
|
|
|
|
// metricSorter is a sortable slice of *dto.Metric.
|
|
type metricSorter []*dto.Metric
|
|
|
|
func (s metricSorter) Len() int {
|
|
return len(s)
|
|
}
|
|
|
|
func (s metricSorter) Swap(i, j int) {
|
|
s[i], s[j] = s[j], s[i]
|
|
}
|
|
|
|
func (s metricSorter) Less(i, j int) bool {
|
|
if len(s[i].Label) != len(s[j].Label) {
|
|
// This should not happen. The metrics are
|
|
// inconsistent. However, we have to deal with the fact, as
|
|
// people might use custom collectors or metric family injection
|
|
// to create inconsistent metrics. So let's simply compare the
|
|
// number of labels in this case. That will still yield
|
|
// reproducible sorting.
|
|
return len(s[i].Label) < len(s[j].Label)
|
|
}
|
|
for n, lp := range s[i].Label {
|
|
vi := lp.GetValue()
|
|
vj := s[j].Label[n].GetValue()
|
|
if vi != vj {
|
|
return vi < vj
|
|
}
|
|
}
|
|
|
|
// We should never arrive here. Multiple metrics with the same
|
|
// label set in the same scrape will lead to undefined ingestion
|
|
// behavior. However, as above, we have to provide stable sorting
|
|
// here, even for inconsistent metrics. So sort equal metrics
|
|
// by their timestamp, with missing timestamps (implying "now")
|
|
// coming last.
|
|
if s[i].TimestampMs == nil {
|
|
return false
|
|
}
|
|
if s[j].TimestampMs == nil {
|
|
return true
|
|
}
|
|
return s[i].GetTimestampMs() < s[j].GetTimestampMs()
|
|
}
|
|
|
|
// NormalizeMetricFamilies returns a MetricFamily slice with empty
|
|
// MetricFamilies pruned and the remaining MetricFamilies sorted by name within
|
|
// the slice, with the contained Metrics sorted within each MetricFamily.
|
|
func NormalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) []*dto.MetricFamily {
|
|
for _, mf := range metricFamiliesByName {
|
|
sort.Sort(metricSorter(mf.Metric))
|
|
}
|
|
names := make([]string, 0, len(metricFamiliesByName))
|
|
for name, mf := range metricFamiliesByName {
|
|
if len(mf.Metric) > 0 {
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
sort.Strings(names)
|
|
result := make([]*dto.MetricFamily, 0, len(names))
|
|
for _, name := range names {
|
|
result = append(result, metricFamiliesByName[name])
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Jemalloc metrics cache to avoid frequent C calls
|
|
var (
|
|
jemallocMetricsCache struct {
|
|
sync.RWMutex
|
|
metrics map[string]*dto.MetricFamily
|
|
timestamp time.Time
|
|
}
|
|
// Cache TTL: 10 seconds to balance performance and data freshness
|
|
// This reduces mallctl("epoch") calls from every scrape to once per 10s
|
|
jemallocMetricsCacheTTL = 10 * time.Second
|
|
)
|
|
|
|
func NewCRegistry() *CRegistry {
|
|
return &CRegistry{
|
|
Registry: prometheus.NewRegistry(),
|
|
}
|
|
}
|
|
|
|
// only re-write the implementation of Gather()
|
|
type CRegistry struct {
|
|
*prometheus.Registry
|
|
mtx sync.RWMutex
|
|
}
|
|
|
|
// Gather implements Gatherer.
|
|
func (r *CRegistry) Gather() (res []*dto.MetricFamily, err error) {
|
|
var parser expfmt.TextParser
|
|
|
|
r.mtx.RLock()
|
|
defer r.mtx.RUnlock()
|
|
|
|
cMetricsStr := C.GetKnowhereMetrics()
|
|
metricsStr := C.GoString(cMetricsStr)
|
|
C.free(unsafe.Pointer(cMetricsStr))
|
|
|
|
out, err := parser.TextToMetricFamilies(strings.NewReader(metricsStr))
|
|
if err != nil {
|
|
log.Error("fail to parse knowhere prometheus metrics", zap.Error(err))
|
|
return
|
|
}
|
|
|
|
cMetricsStr = C.GetCoreMetrics()
|
|
metricsStr = C.GoString(cMetricsStr)
|
|
C.free(unsafe.Pointer(cMetricsStr))
|
|
|
|
out1, err := parser.TextToMetricFamilies(strings.NewReader(metricsStr))
|
|
if err != nil {
|
|
log.Error("fail to parse storage prometheus metrics", zap.Error(err))
|
|
return
|
|
}
|
|
|
|
maps.Copy(out, out1)
|
|
|
|
// Add jemalloc stats metrics
|
|
jemallocMetrics := gatherJemallocMetrics()
|
|
for name, mf := range jemallocMetrics {
|
|
out[name] = mf
|
|
}
|
|
|
|
res = NormalizeMetricFamilies(out)
|
|
return
|
|
}
|
|
|
|
// gatherJemallocMetrics collects comprehensive jemalloc stats and returns them as metric families.
|
|
// Uses a 10-second cache to avoid expensive mallctl("epoch") calls on every Prometheus scrape.
|
|
func gatherJemallocMetrics() map[string]*dto.MetricFamily {
|
|
// Fast path: check if cache is still valid
|
|
jemallocMetricsCache.RLock()
|
|
if time.Since(jemallocMetricsCache.timestamp) < jemallocMetricsCacheTTL && jemallocMetricsCache.metrics != nil {
|
|
cached := jemallocMetricsCache.metrics
|
|
jemallocMetricsCache.RUnlock()
|
|
log.Debug("using cached jemalloc metrics",
|
|
zap.Duration("age", time.Since(jemallocMetricsCache.timestamp)))
|
|
return cached
|
|
}
|
|
jemallocMetricsCache.RUnlock()
|
|
|
|
// Slow path: cache expired, collect fresh metrics from C
|
|
// This involves expensive mallctl("epoch") call which can take 100-5000μs
|
|
result := make(map[string]*dto.MetricFamily)
|
|
|
|
cStats := C.GetJemallocStats()
|
|
if !bool(cStats.success) {
|
|
log.Debug("jemalloc stats not available (may be running on macOS or jemalloc is disabled)")
|
|
return result
|
|
}
|
|
|
|
gaugeType := dto.MetricType_GAUGE
|
|
|
|
// Helper function to create a gauge metric family
|
|
createGaugeFamily := func(name, help string, value float64) *dto.MetricFamily {
|
|
return &dto.MetricFamily{
|
|
Name: proto.String(name),
|
|
Help: proto.String(help),
|
|
Type: &gaugeType,
|
|
Metric: []*dto.Metric{
|
|
{
|
|
Gauge: &dto.Gauge{
|
|
Value: proto.Float64(value),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// Define all jemalloc metrics (8 comprehensive metrics)
|
|
metrics := []struct {
|
|
name string
|
|
help string
|
|
value uint64
|
|
}{
|
|
// Core metrics from jemalloc
|
|
{"milvus_jemalloc_allocated_bytes", "Total number of bytes allocated by the application", uint64(cStats.allocated)},
|
|
{"milvus_jemalloc_active_bytes", "Total number of bytes in active pages allocated by the application (includes fragmentation)", uint64(cStats.active)},
|
|
{"milvus_jemalloc_metadata_bytes", "Total number of bytes dedicated to jemalloc metadata", uint64(cStats.metadata)},
|
|
{"milvus_jemalloc_resident_bytes", "Total number of bytes in physically resident data pages mapped by the allocator", uint64(cStats.resident)},
|
|
{"milvus_jemalloc_mapped_bytes", "Total number of bytes in virtual memory mappings", uint64(cStats.mapped)},
|
|
{"milvus_jemalloc_retained_bytes", "Total number of bytes in retained virtual memory mappings (could be returned to OS)", uint64(cStats.retained)},
|
|
// Derived metrics (calculated in C code)
|
|
{"milvus_jemalloc_fragmentation_bytes", "Internal fragmentation in bytes (active - allocated)", uint64(cStats.fragmentation)},
|
|
{"milvus_jemalloc_overhead_bytes", "Memory overhead in bytes (resident - active)", uint64(cStats.overhead)},
|
|
}
|
|
|
|
for _, m := range metrics {
|
|
result[m.name] = createGaugeFamily(m.name, m.help, float64(m.value))
|
|
}
|
|
|
|
// Update cache with fresh metrics
|
|
jemallocMetricsCache.Lock()
|
|
jemallocMetricsCache.metrics = result
|
|
jemallocMetricsCache.timestamp = time.Now()
|
|
jemallocMetricsCache.Unlock()
|
|
|
|
log.Debug("refreshed jemalloc metrics cache",
|
|
zap.Int("num_metrics", len(result)))
|
|
|
|
return result
|
|
}
|