fix: Leader task stuck and retry again and again (#38202) (#38349)

issue: #38201
pr: #38202
leader task require to update delegator's distribution, and only success
after the distribution change has been applyed to delegator. but the
delegator will reject the distribution change if it's version is older
than current version in delegator. which cause the leader task stuck and
retry forever.

this PR remove the leader task finish check.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-12-11 10:10:43 +08:00 committed by GitHub
parent 25249fd26e
commit f4696a1993
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -24,7 +24,6 @@ import (
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -248,21 +247,5 @@ func (action *LeaderAction) GetLeaderID() typeutil.UniqueID {
}
func (action *LeaderAction) IsFinished(distMgr *meta.DistributionManager) bool {
views := distMgr.LeaderViewManager.GetByFilter(meta.WithNodeID2LeaderView(action.leaderID), meta.WithChannelName2LeaderView(action.Shard()))
if len(views) == 0 {
return false
}
view := lo.MaxBy(views, func(v1 *meta.LeaderView, v2 *meta.LeaderView) bool {
return v1.Version > v2.Version
})
dist := view.Segments[action.SegmentID()]
switch action.Type() {
case ActionTypeGrow:
return action.rpcReturned.Load() && dist != nil && dist.NodeID == action.Node()
case ActionTypeReduce:
return action.rpcReturned.Load() && (dist == nil || dist.NodeID != action.Node())
case ActionTypeUpdate:
return action.rpcReturned.Load() && common.MapEquals(action.partStatsVersions, view.PartitionStatsVersions)
}
return false
return action.rpcReturned.Load()
}