mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 09:38:39 +08:00
fix: Clean offline node from replica after qc recover (#33213)
issue: #33200 #33207 pr#33104 remove this logic by mistake, which cause the offline node will be kept in replica after qc recover, and request send to offline qn will go a NodeNotFound error. Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
parent
cb480d17c8
commit
33bd6eed28
@ -456,6 +456,7 @@ func (s *Server) startQueryCoord() error {
|
|||||||
s.nodeMgr.Stopping(node.ServerID)
|
s.nodeMgr.Stopping(node.ServerID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
s.checkReplicas()
|
||||||
for _, node := range sessions {
|
for _, node := range sessions {
|
||||||
s.handleNodeUp(node.ServerID)
|
s.handleNodeUp(node.ServerID)
|
||||||
}
|
}
|
||||||
@ -777,6 +778,33 @@ func (s *Server) handleNodeDown(node int64) {
|
|||||||
s.meta.ResourceManager.HandleNodeDown(node)
|
s.meta.ResourceManager.HandleNodeDown(node)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkReplicas checks whether replica contains offline node, and remove those nodes
|
||||||
|
func (s *Server) checkReplicas() {
|
||||||
|
for _, collection := range s.meta.CollectionManager.GetAll() {
|
||||||
|
log := log.With(zap.Int64("collectionID", collection))
|
||||||
|
replicas := s.meta.ReplicaManager.GetByCollection(collection)
|
||||||
|
for _, replica := range replicas {
|
||||||
|
toRemove := make([]int64, 0)
|
||||||
|
for _, node := range replica.GetNodes() {
|
||||||
|
if s.nodeMgr.Get(node) == nil {
|
||||||
|
toRemove = append(toRemove, node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(toRemove) > 0 {
|
||||||
|
log := log.With(
|
||||||
|
zap.Int64("replicaID", replica.GetID()),
|
||||||
|
zap.Int64s("offlineNodes", toRemove),
|
||||||
|
)
|
||||||
|
log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove))
|
||||||
|
if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil {
|
||||||
|
log.Warn("failed to remove offline nodes from replica")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) updateBalanceConfigLoop(ctx context.Context) {
|
func (s *Server) updateBalanceConfigLoop(ctx context.Context) {
|
||||||
success := s.updateBalanceConfig()
|
success := s.updateBalanceConfig()
|
||||||
if success {
|
if success {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user