mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: executor/scheduler should be latest replica meta but not replica copy (#45877)
issue: #45865 --------- Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
parent
b69cd23c7c
commit
31976d8adb
@ -228,7 +228,7 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
|
||||
)
|
||||
|
||||
// get segment's replica first, then get shard leader by replica
|
||||
replica := ex.meta.ReplicaManager.GetByCollectionAndNode(ctx, task.CollectionID(), action.Node())
|
||||
replica := ex.meta.ReplicaManager.Get(ctx, task.ReplicaID())
|
||||
if replica == nil {
|
||||
msg := "node doesn't belong to any replica"
|
||||
err := merr.WrapErrNodeNotAvailable(action.Node())
|
||||
@ -331,7 +331,7 @@ func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
|
||||
|
||||
if ex.meta.CollectionManager.Exist(ctx, task.CollectionID()) {
|
||||
// get segment's replica first, then get shard leader by replica
|
||||
replica := ex.meta.ReplicaManager.GetByCollectionAndNode(ctx, task.CollectionID(), action.Node())
|
||||
replica := ex.meta.ReplicaManager.Get(ctx, task.ReplicaID())
|
||||
if replica == nil {
|
||||
msg := "node doesn't belong to any replica, try to send release to worker"
|
||||
err := merr.WrapErrNodeNotAvailable(action.Node())
|
||||
@ -577,7 +577,13 @@ func (ex *Executor) executeDropIndexAction(task *DropIndexTask, step int) {
|
||||
ex.removeTask(task, step)
|
||||
}()
|
||||
|
||||
view := ex.dist.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
replica := ex.meta.ReplicaManager.Get(ctx, task.ReplicaID())
|
||||
if replica == nil {
|
||||
err = merr.WrapErrNodeNotAvailable(action.Node())
|
||||
log.Warn("node doesn't belong to any replica", zap.Error(err))
|
||||
return
|
||||
}
|
||||
view := ex.dist.ChannelDistManager.GetShardLeader(task.Shard(), replica)
|
||||
if view == nil {
|
||||
err = merr.WrapErrChannelNotFound(task.Shard(), "shard delegator not found")
|
||||
log.Warn("failed to get shard leader", zap.Error(err))
|
||||
|
||||
@ -507,7 +507,7 @@ func (scheduler *taskScheduler) preAdd(task Task) error {
|
||||
taskType := GetTaskType(task)
|
||||
|
||||
if taskType == TaskTypeMove {
|
||||
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
leader := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
if leader == nil {
|
||||
return merr.WrapErrServiceInternal("segment's delegator leader not found, stop balancing")
|
||||
}
|
||||
@ -590,6 +590,14 @@ func (scheduler *taskScheduler) preAdd(task Task) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (scheduler *taskScheduler) getReplicaShardLeader(channelName string, replicaID int64) *meta.DmChannel {
|
||||
replica := scheduler.meta.ReplicaManager.Get(scheduler.ctx, replicaID)
|
||||
if replica == nil {
|
||||
return nil
|
||||
}
|
||||
return scheduler.distMgr.ChannelDistManager.GetShardLeader(channelName, replica)
|
||||
}
|
||||
|
||||
func (scheduler *taskScheduler) tryPromoteAll() {
|
||||
// Promote waiting tasks
|
||||
toPromote := make([]Task, 0, scheduler.waitQueue.Len())
|
||||
@ -854,7 +862,7 @@ func (scheduler *taskScheduler) isRelated(task Task, node int64) bool {
|
||||
if task.replica == nil {
|
||||
continue
|
||||
}
|
||||
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
leader := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
if leader == nil {
|
||||
continue
|
||||
}
|
||||
@ -883,7 +891,7 @@ func (scheduler *taskScheduler) preProcess(task Task) bool {
|
||||
case *ChannelAction:
|
||||
// wait for new delegator becomes leader, then try to remove old leader
|
||||
task := task.(*ChannelTask)
|
||||
delegator := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
delegator := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
log.Ctx(scheduler.ctx).Debug("process channelAction", zap.Bool("delegator is Nil", delegator == nil))
|
||||
if delegator != nil {
|
||||
log.Ctx(scheduler.ctx).Debug("process channelAction", zap.Int64("delegator node", delegator.Node),
|
||||
@ -1144,7 +1152,7 @@ func (scheduler *taskScheduler) checkSegmentTaskStale(task *SegmentTask) error {
|
||||
return merr.WrapErrSegmentReduplicate(task.SegmentID(), "target doesn't contain this segment")
|
||||
}
|
||||
|
||||
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
leader := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
if leader == nil {
|
||||
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task)...)
|
||||
return merr.WrapErrChannelNotFound(segment.GetInsertChannel(), "failed to get shard delegator")
|
||||
@ -1198,14 +1206,14 @@ func (scheduler *taskScheduler) checkLeaderTaskStale(task *LeaderTask) error {
|
||||
return merr.WrapErrSegmentReduplicate(task.SegmentID(), "target doesn't contain this segment")
|
||||
}
|
||||
|
||||
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
leader := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
if leader == nil {
|
||||
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task, zap.Int64("leaderID", task.leaderID))...)
|
||||
return merr.WrapErrChannelNotFound(task.Shard(), "failed to get shard delegator")
|
||||
}
|
||||
|
||||
case ActionTypeReduce:
|
||||
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
|
||||
leader := scheduler.getReplicaShardLeader(task.Shard(), task.ReplicaID())
|
||||
if leader == nil {
|
||||
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task, zap.Int64("leaderID", task.leaderID))...)
|
||||
return merr.WrapErrChannelNotFound(task.Shard(), "failed to get shard delegator")
|
||||
|
||||
@ -160,6 +160,7 @@ func (suite *TaskSuite) SetupTest() {
|
||||
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
|
||||
suite.store = querycoord.NewCatalog(suite.kv)
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store, session.NewNodeManager())
|
||||
suite.meta.ReplicaManager.Put(suite.ctx, suite.replica)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.dist = meta.NewDistributionManager(suite.nodeMgr)
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user