mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 10:08:42 +08:00
fix: [2.5] Remove frequently updating metric to avoid mutex contention (#38778)
issue: https://github.com/milvus-io/milvus/issues/37630 Reduce the frequency of `updateIndexTasksMetrics` to avoid holding the mutex for long periods. pr: https://github.com/milvus-io/milvus/pull/38775 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
1602390734
commit
c741b8be2b
@ -27,6 +27,7 @@ import (
|
|||||||
"github.com/hashicorp/golang-lru/v2/expirable"
|
"github.com/hashicorp/golang-lru/v2/expirable"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
"go.uber.org/atomic"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"google.golang.org/protobuf/proto"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
@ -65,6 +66,8 @@ type indexMeta struct {
|
|||||||
|
|
||||||
// segmentID -> indexID -> segmentIndex
|
// segmentID -> indexID -> segmentIndex
|
||||||
segmentIndexes map[UniqueID]map[UniqueID]*model.SegmentIndex
|
segmentIndexes map[UniqueID]map[UniqueID]*model.SegmentIndex
|
||||||
|
|
||||||
|
lastUpdateMetricTime atomic.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
|
func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
|
||||||
@ -205,6 +208,10 @@ func (m *indexMeta) updateSegIndexMeta(segIdx *model.SegmentIndex, updateFunc fu
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *indexMeta) updateIndexTasksMetrics() {
|
func (m *indexMeta) updateIndexTasksMetrics() {
|
||||||
|
if time.Since(m.lastUpdateMetricTime.Load()) < 120*time.Second {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer m.lastUpdateMetricTime.Store(time.Now())
|
||||||
taskMetrics := make(map[UniqueID]map[commonpb.IndexState]int)
|
taskMetrics := make(map[UniqueID]map[commonpb.IndexState]int)
|
||||||
for _, segIdx := range m.segmentBuildInfo.List() {
|
for _, segIdx := range m.segmentBuildInfo.List() {
|
||||||
if segIdx.IsDeleted {
|
if segIdx.IsDeleted {
|
||||||
@ -233,6 +240,7 @@ func (m *indexMeta) updateIndexTasksMetrics() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
|
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
|
||||||
@ -874,7 +882,7 @@ func (m *indexMeta) GetAllSegIndexes() map[int64]*model.SegmentIndex {
|
|||||||
tasks := m.segmentBuildInfo.List()
|
tasks := m.segmentBuildInfo.List()
|
||||||
segIndexes := make(map[int64]*model.SegmentIndex, len(tasks))
|
segIndexes := make(map[int64]*model.SegmentIndex, len(tasks))
|
||||||
for buildID, segIndex := range tasks {
|
for buildID, segIndex := range tasks {
|
||||||
segIndexes[buildID] = model.CloneSegmentIndex(segIndex)
|
segIndexes[buildID] = segIndex
|
||||||
}
|
}
|
||||||
return segIndexes
|
return segIndexes
|
||||||
}
|
}
|
||||||
@ -971,22 +979,6 @@ func (m *indexMeta) CheckCleanSegmentIndex(buildID UniqueID) (bool, *model.Segme
|
|||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *indexMeta) GetMetasByNodeID(nodeID UniqueID) []*model.SegmentIndex {
|
|
||||||
m.RLock()
|
|
||||||
defer m.RUnlock()
|
|
||||||
|
|
||||||
metas := make([]*model.SegmentIndex, 0)
|
|
||||||
for _, segIndex := range m.segmentBuildInfo.List() {
|
|
||||||
if segIndex.IsDeleted {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if nodeID == segIndex.NodeID {
|
|
||||||
metas = append(metas, model.CloneSegmentIndex(segIndex))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return metas
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
|
func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
|
||||||
m.RLock()
|
m.RLock()
|
||||||
defer m.RUnlock()
|
defer m.RUnlock()
|
||||||
|
|||||||
@ -170,7 +170,9 @@ type taskScheduler struct {
|
|||||||
channelTasks map[replicaChannelIndex]Task
|
channelTasks map[replicaChannelIndex]Task
|
||||||
processQueue *taskQueue
|
processQueue *taskQueue
|
||||||
waitQueue *taskQueue
|
waitQueue *taskQueue
|
||||||
taskStats *expirable.LRU[UniqueID, Task]
|
|
||||||
|
taskStats *expirable.LRU[UniqueID, Task]
|
||||||
|
lastUpdateMetricTime atomic.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduler(ctx context.Context,
|
func NewScheduler(ctx context.Context,
|
||||||
@ -292,6 +294,9 @@ func (scheduler *taskScheduler) Add(task Task) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *taskScheduler) updateTaskMetrics() {
|
func (scheduler *taskScheduler) updateTaskMetrics() {
|
||||||
|
if time.Since(scheduler.lastUpdateMetricTime.Load()) < 30*time.Second {
|
||||||
|
return
|
||||||
|
}
|
||||||
segmentGrowNum, segmentReduceNum, segmentMoveNum := 0, 0, 0
|
segmentGrowNum, segmentReduceNum, segmentMoveNum := 0, 0, 0
|
||||||
channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0
|
channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0
|
||||||
for _, task := range scheduler.segmentTasks {
|
for _, task := range scheduler.segmentTasks {
|
||||||
@ -324,6 +329,7 @@ func (scheduler *taskScheduler) updateTaskMetrics() {
|
|||||||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum))
|
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum))
|
||||||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum))
|
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum))
|
||||||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum))
|
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum))
|
||||||
|
scheduler.lastUpdateMetricTime.Store(time.Now())
|
||||||
}
|
}
|
||||||
|
|
||||||
// check whether the task is valid to add,
|
// check whether the task is valid to add,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user