From bb5088e605ed2a2673301b439b7857ec6de61e02 Mon Sep 17 00:00:00 2001 From: wei liu Date: Thu, 16 Mar 2023 14:27:55 +0800 Subject: [PATCH] fix unassign from rg (#22747) Signed-off-by: Wei Liu --- internal/querycoordv2/meta/resource_manager.go | 18 +++++++++++++----- .../querycoordv2/meta/resource_manager_test.go | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/internal/querycoordv2/meta/resource_manager.go b/internal/querycoordv2/meta/resource_manager.go index 1c75762f6d..c122f0277f 100644 --- a/internal/querycoordv2/meta/resource_manager.go +++ b/internal/querycoordv2/meta/resource_manager.go @@ -273,7 +273,7 @@ func (rm *ResourceManager) unassignNode(rgName string, node int64) error { return ErrRGNotExist } - if rm.nodeMgr.Get(node) == nil || !rm.groups[rgName].containsNode(node) { + if !rm.groups[rgName].containsNode(node) { // remove non exist node should be tolerable return nil } @@ -477,10 +477,6 @@ func (rm *ResourceManager) HandleNodeDown(node int64) (string, error) { rm.rwmutex.Lock() defer rm.rwmutex.Unlock() - if rm.nodeMgr.Get(node) == nil { - return "", ErrNodeNotExist - } - rgName, err := rm.findResourceGroupByNode(node) if err != nil { return "", ErrNodeNotAssignToRG @@ -555,6 +551,12 @@ func (rm *ResourceManager) TransferNode(from string, to string, numNode int) ([] // interrupt transfer, unreachable logic path return nil, err } + + log.Info("transfer node", + zap.String("sourceRG", from), + zap.String("targetRG", to), + zap.Int64("nodeID", node), + ) } return movedNodes, nil @@ -616,6 +618,7 @@ func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) ([]int64, err ret := make([]int64, 0) + rm.checkRGNodeStatus(DefaultResourceGroupName) rm.checkRGNodeStatus(rgName) lackNodesNum := rm.groups[rgName].LackOfNodes() nodesInDefault := rm.groups[DefaultResourceGroupName].GetNodes() @@ -635,6 +638,11 @@ func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) ([]int64, err return ret, err } + log.Info("move node from default rg to recover", + zap.String("targetRG", rgName), + zap.Int64("nodeID", node), + ) + ret = append(ret, node) } diff --git a/internal/querycoordv2/meta/resource_manager_test.go b/internal/querycoordv2/meta/resource_manager_test.go index cb2645bbf7..d0b8702c7f 100644 --- a/internal/querycoordv2/meta/resource_manager_test.go +++ b/internal/querycoordv2/meta/resource_manager_test.go @@ -321,6 +321,23 @@ func (suite *ResourceManagerSuite) TestAutoRecover() { suite.manager.AutoRecoverResourceGroup("rg") lackNodes = suite.manager.CheckLackOfNode("rg") suite.Equal(lackNodes, 0) + + // test auto recover behavior when all node down + suite.manager.nodeMgr.Remove(1) + suite.manager.nodeMgr.Remove(2) + suite.manager.AutoRecoverResourceGroup("rg") + nodes, _ := suite.manager.GetNodes("rg") + suite.Len(nodes, 0) + nodes, _ = suite.manager.GetNodes(DefaultResourceGroupName) + suite.Len(nodes, 0) + + suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost")) + suite.manager.HandleNodeUp(1) + suite.manager.AutoRecoverResourceGroup("rg") + nodes, _ = suite.manager.GetNodes("rg") + suite.Len(nodes, 1) + nodes, _ = suite.manager.GetNodes(DefaultResourceGroupName) + suite.Len(nodes, 0) } func (suite *ResourceManagerSuite) TestDefaultResourceGroup() {