mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: Clean offline node from replica after qc recover (#33213)
issue: #33200 #33207 pr#33104 remove this logic by mistake, which cause the offline node will be kept in replica after qc recover, and request send to offline qn will go a NodeNotFound error. Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
parent
cb480d17c8
commit
33bd6eed28
@ -456,6 +456,7 @@ func (s *Server) startQueryCoord() error {
|
||||
s.nodeMgr.Stopping(node.ServerID)
|
||||
}
|
||||
}
|
||||
s.checkReplicas()
|
||||
for _, node := range sessions {
|
||||
s.handleNodeUp(node.ServerID)
|
||||
}
|
||||
@ -777,6 +778,33 @@ func (s *Server) handleNodeDown(node int64) {
|
||||
s.meta.ResourceManager.HandleNodeDown(node)
|
||||
}
|
||||
|
||||
// checkReplicas checks whether replica contains offline node, and remove those nodes
|
||||
func (s *Server) checkReplicas() {
|
||||
for _, collection := range s.meta.CollectionManager.GetAll() {
|
||||
log := log.With(zap.Int64("collectionID", collection))
|
||||
replicas := s.meta.ReplicaManager.GetByCollection(collection)
|
||||
for _, replica := range replicas {
|
||||
toRemove := make([]int64, 0)
|
||||
for _, node := range replica.GetNodes() {
|
||||
if s.nodeMgr.Get(node) == nil {
|
||||
toRemove = append(toRemove, node)
|
||||
}
|
||||
}
|
||||
|
||||
if len(toRemove) > 0 {
|
||||
log := log.With(
|
||||
zap.Int64("replicaID", replica.GetID()),
|
||||
zap.Int64s("offlineNodes", toRemove),
|
||||
)
|
||||
log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove))
|
||||
if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil {
|
||||
log.Warn("failed to remove offline nodes from replica")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) updateBalanceConfigLoop(ctx context.Context) {
|
||||
success := s.updateBalanceConfig()
|
||||
if success {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user