mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
issue: #41544 - add lock interceptor into wal. - use recovery and shardmanager to replace the original implementation of segment assignment. - remove redundant implementation and unittest. - remove redundant proto definition. - use 2 streamingnode in e2e. --------- Signed-off-by: chyezh <chyezh@outlook.com>
144 lines
5.2 KiB
Go
144 lines
5.2 KiB
Go
package metricsutil
|
|
|
|
import (
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"go.uber.org/zap/zapcore"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/streamingutil/status"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/metrics"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/types"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/walimpls/impls/wp"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
)
|
|
|
|
// NewWriteMetrics creates a new WriteMetrics.
|
|
func NewWriteMetrics(pchannel types.PChannelInfo, walName string) *WriteMetrics {
|
|
constLabel := prometheus.Labels{
|
|
metrics.NodeIDLabelName: paramtable.GetStringNodeID(),
|
|
metrics.WALChannelLabelName: pchannel.Name,
|
|
}
|
|
metrics.WALInfo.WithLabelValues(
|
|
paramtable.GetStringNodeID(),
|
|
pchannel.Name,
|
|
strconv.FormatInt(pchannel.Term, 10),
|
|
walName).Set(1)
|
|
|
|
slowLogThreshold := paramtable.Get().StreamingCfg.LoggingAppendSlowThreshold.GetAsDurationByParse()
|
|
if slowLogThreshold <= 0 {
|
|
slowLogThreshold = time.Second
|
|
}
|
|
if walName == wp.WALName && slowLogThreshold < 3*time.Second {
|
|
// woodpecker wal is always slow, so we need to set a higher threshold by default.
|
|
slowLogThreshold = 3 * time.Second
|
|
}
|
|
return &WriteMetrics{
|
|
walName: walName,
|
|
pchannel: pchannel,
|
|
constLabel: constLabel,
|
|
bytes: metrics.WALAppendMessageBytes.MustCurryWith(constLabel),
|
|
total: metrics.WALAppendMessageTotal.MustCurryWith(constLabel),
|
|
walDuration: metrics.WALAppendMessageDurationSeconds.MustCurryWith(constLabel),
|
|
walimplsRetryTotal: metrics.WALImplsAppendRetryTotal.With(constLabel),
|
|
walimplsDuration: metrics.WALImplsAppendMessageDurationSeconds.MustCurryWith(constLabel),
|
|
walBeforeInterceptorDuration: metrics.WALAppendMessageBeforeInterceptorDurationSeconds.MustCurryWith(constLabel),
|
|
walAfterInterceptorDuration: metrics.WALAppendMessageAfterInterceptorDurationSeconds.MustCurryWith(constLabel),
|
|
slowLogThreshold: time.Second,
|
|
}
|
|
}
|
|
|
|
type WriteMetrics struct {
|
|
log.Binder
|
|
|
|
walName string
|
|
pchannel types.PChannelInfo
|
|
constLabel prometheus.Labels
|
|
bytes prometheus.ObserverVec
|
|
total *prometheus.CounterVec
|
|
walDuration prometheus.ObserverVec
|
|
walimplsRetryTotal prometheus.Counter
|
|
walimplsDuration prometheus.ObserverVec
|
|
walBeforeInterceptorDuration prometheus.ObserverVec
|
|
walAfterInterceptorDuration prometheus.ObserverVec
|
|
slowLogThreshold time.Duration
|
|
}
|
|
|
|
func (m *WriteMetrics) StartAppend(msg message.MutableMessage) *AppendMetrics {
|
|
return &AppendMetrics{
|
|
wm: m,
|
|
msg: msg,
|
|
interceptors: make(map[string][]*InterceptorMetrics),
|
|
}
|
|
}
|
|
|
|
func (m *WriteMetrics) done(appendMetrics *AppendMetrics) {
|
|
if !appendMetrics.msg.IsPersisted() {
|
|
return
|
|
}
|
|
status := parseError(appendMetrics.err)
|
|
if appendMetrics.implAppendDuration != 0 {
|
|
m.walimplsDuration.WithLabelValues(status).Observe(appendMetrics.implAppendDuration.Seconds())
|
|
}
|
|
m.bytes.WithLabelValues(status).Observe(float64(appendMetrics.msg.EstimateSize()))
|
|
m.total.WithLabelValues(appendMetrics.msg.MessageType().String(), status).Inc()
|
|
m.walDuration.WithLabelValues(status).Observe(appendMetrics.appendDuration.Seconds())
|
|
for name, ims := range appendMetrics.interceptors {
|
|
for _, im := range ims {
|
|
if im.Before != 0 {
|
|
m.walBeforeInterceptorDuration.WithLabelValues(name).Observe(im.Before.Seconds())
|
|
}
|
|
if im.After != 0 {
|
|
m.walAfterInterceptorDuration.WithLabelValues(name).Observe(im.After.Seconds())
|
|
}
|
|
}
|
|
}
|
|
if appendMetrics.err != nil {
|
|
m.Logger().Warn("append message into wal failed", appendMetrics.IntoLogFields()...)
|
|
return
|
|
}
|
|
if appendMetrics.appendDuration >= m.slowLogThreshold {
|
|
// log slow append catch
|
|
m.Logger().Warn("append message into wal too slow", appendMetrics.IntoLogFields()...)
|
|
return
|
|
}
|
|
if m.Logger().Level().Enabled(zapcore.DebugLevel) {
|
|
m.Logger().Debug("append message into wal", appendMetrics.IntoLogFields()...)
|
|
}
|
|
}
|
|
|
|
// ObserveRetry observes the retry of the walimpls.
|
|
func (m *WriteMetrics) ObserveRetry() {
|
|
m.walimplsRetryTotal.Inc()
|
|
}
|
|
|
|
func (m *WriteMetrics) Close() {
|
|
metrics.WALAppendMessageBeforeInterceptorDurationSeconds.DeletePartialMatch(m.constLabel)
|
|
metrics.WALAppendMessageAfterInterceptorDurationSeconds.DeletePartialMatch(m.constLabel)
|
|
metrics.WALAppendMessageBytes.DeletePartialMatch(m.constLabel)
|
|
metrics.WALAppendMessageTotal.DeletePartialMatch(m.constLabel)
|
|
metrics.WALAppendMessageDurationSeconds.DeletePartialMatch(m.constLabel)
|
|
metrics.WALImplsAppendRetryTotal.DeletePartialMatch(m.constLabel)
|
|
metrics.WALImplsAppendMessageDurationSeconds.DeletePartialMatch(m.constLabel)
|
|
metrics.WALInfo.DeleteLabelValues(
|
|
paramtable.GetStringNodeID(),
|
|
m.pchannel.Name,
|
|
strconv.FormatInt(m.pchannel.Term, 10),
|
|
m.walName,
|
|
)
|
|
}
|
|
|
|
// parseError parses the error to status.
|
|
func parseError(err error) string {
|
|
if err == nil {
|
|
return metrics.WALStatusOK
|
|
}
|
|
if status.IsCanceled(err) {
|
|
return metrics.WALStatusCancel
|
|
}
|
|
return metrics.WALStatusError
|
|
}
|