fix: query node may stuck at stopping progress (#33104)

issue: #33103 
when try to do stopping balance for stopping query node, balancer will
try to get node list from replica.GetNodes, then check whether node is
stopping, if so, stopping balance will be triggered for this replica.

after the replica refactor, replica.GetNodes only return rwNodes, and
the stopping node maintains in roNodes, so balancer couldn't find
replica which contains stopping node, and stopping balance for replica
won't be triggered, then query node will stuck forever due to
segment/channel doesn't move out.

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-05-20 10:21:38 +08:00 committed by GitHub
parent c6e2dd05fc
commit a7f6193bfc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 182 additions and 352 deletions

View File

@ -77,68 +77,51 @@ func (b *ChannelLevelScoreBalancer) BalanceReplica(replica *meta.Replica) ([]Seg
return nil, nil return nil, nil
} }
onlineNodes := make([]int64, 0) rwNodes := replica.GetChannelRWNodes(channelName)
offlineNodes := make([]int64, 0) roNodes := replica.GetRONodes()
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
// mark channel's outbound access node as offline // mark channel's outbound access node as offline
channelRWNode := typeutil.NewUniqueSet(replica.GetChannelRWNodes(channelName)...) channelRWNode := typeutil.NewUniqueSet(rwNodes...)
channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(channelName), meta.WithReplica2Channel(replica)) channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(channelName), meta.WithReplica2Channel(replica))
for _, channel := range channelDist { for _, channel := range channelDist {
if !channelRWNode.Contain(channel.Node) { if !channelRWNode.Contain(channel.Node) {
offlineNodes = append(offlineNodes, channel.Node) roNodes = append(roNodes, channel.Node)
} }
} }
segmentDist := b.dist.SegmentDistManager.GetByFilter(meta.WithChannel(channelName), meta.WithReplica(replica)) segmentDist := b.dist.SegmentDistManager.GetByFilter(meta.WithChannel(channelName), meta.WithReplica(replica))
for _, segment := range segmentDist { for _, segment := range segmentDist {
if !channelRWNode.Contain(segment.Node) { if !channelRWNode.Contain(segment.Node) {
offlineNodes = append(offlineNodes, segment.Node) roNodes = append(roNodes, segment.Node)
} }
} }
for nid := range channelRWNode { if len(rwNodes) == 0 {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
// no available nodes to balance // no available nodes to balance
return nil, nil return nil, nil
} }
if len(offlineNodes) != 0 { if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() { if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes)) log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil return nil, nil
} }
log.Info("Handle stopping nodes", log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes), zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", onlineNodes), zap.Any("available nodes", rwNodes),
) )
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score // handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, channelName, onlineNodes, offlineNodes)...) channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, channelName, rwNodes, roNodes)...)
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, channelName, onlineNodes, offlineNodes)...) segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, channelName, rwNodes, roNodes)...)
} }
} else { } else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() { if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, channelName, onlineNodes)...) channelPlans = append(channelPlans, b.genChannelPlan(replica, channelName, rwNodes)...)
} }
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, channelName, onlineNodes)...) segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, channelName, rwNodes)...)
} }
} }
} }

View File

@ -1162,8 +1162,13 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Nod
}, },
}...) }...)
suite.balancer.nodeManager.Stopping(ch1Nodes[0]) balancer.nodeManager.Stopping(ch1Nodes[0])
suite.balancer.nodeManager.Stopping(ch2Nodes[0]) balancer.nodeManager.Stopping(ch2Nodes[0])
suite.balancer.meta.ResourceManager.HandleNodeStopping(ch1Nodes[0])
suite.balancer.meta.ResourceManager.HandleNodeStopping(ch2Nodes[0])
utils.RecoverAllCollection(balancer.meta)
replica = balancer.meta.ReplicaManager.Get(replica.GetID())
sPlans, cPlans := balancer.BalanceReplica(replica) sPlans, cPlans := balancer.BalanceReplica(replica)
suite.Len(sPlans, 0) suite.Len(sPlans, 0)
suite.Len(cPlans, 2) suite.Len(cPlans, 2)

View File

@ -466,67 +466,49 @@ func (b *MultiTargetBalancer) BalanceReplica(replica *meta.Replica) ([]SegmentAs
return nil, nil return nil, nil
} }
onlineNodes := make([]int64, 0) rwNodes := replica.GetRWNodes()
offlineNodes := make([]int64, 0) roNodes := replica.GetRONodes()
// read only nodes is offline in current replica. if len(rwNodes) == 0 {
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
// no available nodes to balance // no available nodes to balance
return nil, nil return nil, nil
} }
// print current distribution before generating plans // print current distribution before generating plans
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0) segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 { if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() { if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes)) log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil return nil, nil
} }
log.Info("Handle stopping nodes", log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes), zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", onlineNodes), zap.Any("available nodes", rwNodes),
) )
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score // handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...) channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...) segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
} }
} else { } else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() { if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...) channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
} }
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = b.genSegmentPlan(replica) segmentPlans = b.genSegmentPlan(replica, rwNodes)
} }
} }
return segmentPlans, channelPlans return segmentPlans, channelPlans
} }
func (b *MultiTargetBalancer) genSegmentPlan(replica *meta.Replica) []SegmentAssignPlan { func (b *MultiTargetBalancer) genSegmentPlan(replica *meta.Replica, rwNodes []int64) []SegmentAssignPlan {
// get segments distribution on replica level and global level // get segments distribution on replica level and global level
nodeSegments := make(map[int64][]*meta.Segment) nodeSegments := make(map[int64][]*meta.Segment)
globalNodeSegments := make(map[int64][]*meta.Segment) globalNodeSegments := make(map[int64][]*meta.Segment)
for _, node := range replica.GetNodes() { for _, node := range rwNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node)) dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool { segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil && return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&

View File

@ -126,9 +126,7 @@ func (b *RowCountBasedBalancer) AssignChannel(channels []*meta.DmChannel, nodes
func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*nodeItem { func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs)) ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) { for _, node := range nodeIDs {
node := nodeInfo.ID()
// calculate sealed segment row count on node // calculate sealed segment row count on node
segments := b.dist.SegmentDistManager.GetByFilter(meta.WithNodeID(node)) segments := b.dist.SegmentDistManager.GetByFilter(meta.WithNodeID(node))
rowcnt := 0 rowcnt := 0
@ -151,8 +149,7 @@ func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*
func (b *RowCountBasedBalancer) convertToNodeItemsByChannel(nodeIDs []int64) []*nodeItem { func (b *RowCountBasedBalancer) convertToNodeItemsByChannel(nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs)) ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) { for _, node := range nodeIDs {
node := nodeInfo.ID()
channels := b.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(node)) channels := b.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(node))
// more channel num, less priority // more channel num, less priority
@ -172,71 +169,52 @@ func (b *RowCountBasedBalancer) BalanceReplica(replica *meta.Replica) ([]Segment
return nil, nil return nil, nil
} }
onlineNodes := make([]int64, 0) rwNodes := replica.GetRWNodes()
offlineNodes := make([]int64, 0) roNodes := replica.GetRONodes()
if len(rwNodes) == 0 {
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
// no available nodes to balance // no available nodes to balance
return nil, nil return nil, nil
} }
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0) segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 { if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() { if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes)) log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil return nil, nil
} }
log.Info("Handle stopping nodes", log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes), zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", onlineNodes), zap.Any("available nodes", rwNodes),
) )
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score // handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...) channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...) segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
} }
} else { } else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() { if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...) channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
} }
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, onlineNodes)...) segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, rwNodes)...)
} }
} }
return segmentPlans, channelPlans return segmentPlans, channelPlans
} }
func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []SegmentAssignPlan { func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, rwNodes []int64, roNodes []int64) []SegmentAssignPlan {
segmentPlans := make([]SegmentAssignPlan, 0) segmentPlans := make([]SegmentAssignPlan, 0)
for _, nodeID := range offlineNodes { for _, nodeID := range roNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(nodeID)) dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(nodeID))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool { segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil && return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&
b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.NextTarget) != nil && b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.NextTarget) != nil &&
segment.GetLevel() != datapb.SegmentLevel_L0 segment.GetLevel() != datapb.SegmentLevel_L0
}) })
plans := b.AssignSegment(replica.GetCollectionID(), segments, onlineNodes, false) plans := b.AssignSegment(replica.GetCollectionID(), segments, rwNodes, false)
for i := range plans { for i := range plans {
plans[i].From = nodeID plans[i].From = nodeID
plans[i].Replica = replica plans[i].Replica = replica
@ -246,13 +224,13 @@ func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, on
return segmentPlans return segmentPlans
} }
func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNodes []int64) []SegmentAssignPlan { func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, rwNodes []int64) []SegmentAssignPlan {
segmentsToMove := make([]*meta.Segment, 0) segmentsToMove := make([]*meta.Segment, 0)
nodeRowCount := make(map[int64]int, 0) nodeRowCount := make(map[int64]int, 0)
segmentDist := make(map[int64][]*meta.Segment) segmentDist := make(map[int64][]*meta.Segment)
totalRowCount := 0 totalRowCount := 0
for _, node := range onlineNodes { for _, node := range rwNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node)) dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool { segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil && return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&
@ -273,7 +251,7 @@ func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNode
} }
// find nodes with less row count than average // find nodes with less row count than average
average := totalRowCount / len(onlineNodes) average := totalRowCount / len(rwNodes)
nodesWithLessRow := make([]int64, 0) nodesWithLessRow := make([]int64, 0)
for node, segments := range segmentDist { for node, segments := range segmentDist {
sort.Slice(segments, func(i, j int) bool { sort.Slice(segments, func(i, j int) bool {
@ -313,11 +291,11 @@ func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNode
return segmentPlans return segmentPlans
} }
func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []ChannelAssignPlan { func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, rwNodes []int64, roNodes []int64) []ChannelAssignPlan {
channelPlans := make([]ChannelAssignPlan, 0) channelPlans := make([]ChannelAssignPlan, 0)
for _, nodeID := range offlineNodes { for _, nodeID := range roNodes {
dmChannels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(nodeID)) dmChannels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(nodeID))
plans := b.AssignChannel(dmChannels, onlineNodes, false) plans := b.AssignChannel(dmChannels, rwNodes, false)
for i := range plans { for i := range plans {
plans[i].From = nodeID plans[i].From = nodeID
plans[i].Replica = replica plans[i].Replica = replica
@ -327,20 +305,20 @@ func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, on
return channelPlans return channelPlans
} }
func (b *RowCountBasedBalancer) genChannelPlan(replica *meta.Replica, onlineNodes []int64) []ChannelAssignPlan { func (b *RowCountBasedBalancer) genChannelPlan(replica *meta.Replica, rwNodes []int64) []ChannelAssignPlan {
channelPlans := make([]ChannelAssignPlan, 0) channelPlans := make([]ChannelAssignPlan, 0)
if len(onlineNodes) > 1 { if len(rwNodes) > 1 {
// start to balance channels on all available nodes // start to balance channels on all available nodes
channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithReplica2Channel(replica)) channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithReplica2Channel(replica))
if len(channelDist) == 0 { if len(channelDist) == 0 {
return nil return nil
} }
average := int(math.Ceil(float64(len(channelDist)) / float64(len(onlineNodes)))) average := int(math.Ceil(float64(len(channelDist)) / float64(len(rwNodes))))
// find nodes with less channel count than average // find nodes with less channel count than average
nodeWithLessChannel := make([]int64, 0) nodeWithLessChannel := make([]int64, 0)
channelsToMove := make([]*meta.DmChannel, 0) channelsToMove := make([]*meta.DmChannel, 0)
for _, node := range onlineNodes { for _, node := range rwNodes {
channels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(node)) channels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(node))
if len(channels) <= average { if len(channels) <= average {

View File

@ -409,8 +409,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
segmentCnts: []int{1, 2}, segmentCnts: []int{1, 2},
states: []session.State{session.NodeStateNormal, session.NodeStateNormal}, states: []session.State{session.NodeStateNormal, session.NodeStateNormal},
distributions: map[int64][]*meta.Segment{ distributions: map[int64][]*meta.Segment{
1: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 30}, Node: 11}}, 11: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 30}, Node: 11}},
2: { 22: {
{SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 1, NumOfRows: 20}, Node: 22}, {SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 1, NumOfRows: 20}, Node: 22},
{SegmentInfo: &datapb.SegmentInfo{ID: 3, CollectionID: 1, NumOfRows: 30}, Node: 22}, {SegmentInfo: &datapb.SegmentInfo{ID: 3, CollectionID: 1, NumOfRows: 30}, Node: 22},
}, },
@ -455,7 +455,7 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
collection.LoadType = querypb.LoadType_LoadCollection collection.LoadType = querypb.LoadType_LoadCollection
balancer.meta.CollectionManager.PutCollection(collection) balancer.meta.CollectionManager.PutCollection(collection)
balancer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1)) balancer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1))
balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, append(c.nodes, c.notExistedNodes...))) balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, c.nodes))
suite.broker.ExpectedCalls = nil suite.broker.ExpectedCalls = nil
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(1)).Return(nil, segments, nil) suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(1)).Return(nil, segments, nil)
balancer.targetMgr.UpdateCollectionNextTarget(int64(1)) balancer.targetMgr.UpdateCollectionNextTarget(int64(1))
@ -481,6 +481,7 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
suite.balancer.nodeManager.Add(nodeInfo) suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i]) suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
} }
utils.RecoverAllCollection(balancer.meta)
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1) segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1)
if !c.multiple { if !c.multiple {
@ -492,10 +493,11 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
} }
// clear distribution // clear distribution
for node := range c.distributions {
for _, node := range c.nodes {
balancer.meta.ResourceManager.HandleNodeDown(node)
balancer.nodeManager.Remove(node)
balancer.dist.SegmentDistManager.Update(node) balancer.dist.SegmentDistManager.Update(node)
}
for node := range c.distributionChannels {
balancer.dist.ChannelDistManager.Update(node) balancer.dist.ChannelDistManager.Update(node)
} }
}) })
@ -693,6 +695,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnPartStopping() {
suite.balancer.nodeManager.Add(nodeInfo) suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i]) suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
} }
utils.RecoverAllCollection(balancer.meta)
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1) segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1)
assertSegmentAssignPlanElementMatch(&suite.Suite, c.expectPlans, segmentPlans) assertSegmentAssignPlanElementMatch(&suite.Suite, c.expectPlans, segmentPlans)
assertChannelAssignPlanElementMatch(&suite.Suite, c.expectChannelPlans, channelPlans) assertChannelAssignPlanElementMatch(&suite.Suite, c.expectChannelPlans, channelPlans)

View File

@ -141,8 +141,7 @@ func (b *ScoreBasedBalancer) hasEnoughBenefit(sourceNode *nodeItem, targetNode *
func (b *ScoreBasedBalancer) convertToNodeItems(collectionID int64, nodeIDs []int64) []*nodeItem { func (b *ScoreBasedBalancer) convertToNodeItems(collectionID int64, nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs)) ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) { for _, node := range nodeIDs {
node := nodeInfo.ID()
priority := b.calculateScore(collectionID, node) priority := b.calculateScore(collectionID, node)
nodeItem := newNodeItem(priority, node) nodeItem := newNodeItem(priority, node)
ret = append(ret, &nodeItem) ret = append(ret, &nodeItem)
@ -195,56 +194,38 @@ func (b *ScoreBasedBalancer) BalanceReplica(replica *meta.Replica) ([]SegmentAss
return nil, nil return nil, nil
} }
onlineNodes := make([]int64, 0) rwNodes := replica.GetRWNodes()
offlineNodes := make([]int64, 0) roNodes := replica.GetRONodes()
// read only nodes is offline in current replica. if len(rwNodes) == 0 {
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
// no available nodes to balance // no available nodes to balance
return nil, nil return nil, nil
} }
// print current distribution before generating plans // print current distribution before generating plans
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0) segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 { if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() { if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes)) log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil return nil, nil
} }
log.Info("Handle stopping nodes", log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes), zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", onlineNodes), zap.Any("available nodes", rwNodes),
) )
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score // handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...) channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...) segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
} }
} else { } else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() { if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...) channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
} }
if len(channelPlans) == 0 { if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, onlineNodes)...) segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, rwNodes)...)
} }
} }

View File

@ -439,6 +439,7 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceOneRound() {
suite.balancer.nodeManager.Add(nodeInfo) suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i]) suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
} }
utils.RecoverAllCollection(balancer.meta)
// 4. balance and verify result // 4. balance and verify result
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, c.collectionID) segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, c.collectionID)

View File

@ -101,12 +101,8 @@ func (b *BalanceChecker) replicasToBalance() []int64 {
} }
replicas := b.meta.ReplicaManager.GetByCollection(cid) replicas := b.meta.ReplicaManager.GetByCollection(cid)
for _, replica := range replicas { for _, replica := range replicas {
for _, nodeID := range replica.GetNodes() { if replica.RONodesCount() > 0 {
isStopping, _ := b.nodeManager.IsStoppingNode(nodeID)
if isStopping {
stoppingReplicas = append(stoppingReplicas, replica.GetID()) stoppingReplicas = append(stoppingReplicas, replica.GetID())
break
}
} }
} }
} }

View File

@ -278,6 +278,14 @@ func (suite *BalanceCheckerTestSuite) TestStoppingBalance() {
suite.targetMgr.UpdateCollectionNextTarget(int64(cid2)) suite.targetMgr.UpdateCollectionNextTarget(int64(cid2))
suite.targetMgr.UpdateCollectionCurrentTarget(int64(cid2)) suite.targetMgr.UpdateCollectionCurrentTarget(int64(cid2))
mr1 := replica1.CopyForWrite()
mr1.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr1.IntoReplica())
mr2 := replica2.CopyForWrite()
mr2.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr2.IntoReplica())
// test stopping balance // test stopping balance
idsToBalance := []int64{int64(replicaID1), int64(replicaID2)} idsToBalance := []int64{int64(replicaID1), int64(replicaID2)}
replicasToBalance := suite.checker.replicasToBalance() replicasToBalance := suite.checker.replicasToBalance()
@ -348,6 +356,14 @@ func (suite *BalanceCheckerTestSuite) TestTargetNotReady() {
suite.checker.meta.CollectionManager.PutCollection(collection2, partition2) suite.checker.meta.CollectionManager.PutCollection(collection2, partition2)
suite.checker.meta.ReplicaManager.Put(replica2) suite.checker.meta.ReplicaManager.Put(replica2)
mr1 := replica1.CopyForWrite()
mr1.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr1.IntoReplica())
mr2 := replica2.CopyForWrite()
mr2.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr2.IntoReplica())
// test stopping balance // test stopping balance
idsToBalance := []int64{int64(replicaID1)} idsToBalance := []int64{int64(replicaID1)}
replicasToBalance := suite.checker.replicasToBalance() replicasToBalance := suite.checker.replicasToBalance()

View File

@ -130,7 +130,7 @@ func (c *ChannelChecker) getDmChannelDiff(collectionID int64,
return return
} }
dist := c.getChannelDist(replica) dist := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
distMap := typeutil.NewSet[string]() distMap := typeutil.NewSet[string]()
for _, ch := range dist { for _, ch := range dist {
distMap.Insert(ch.GetChannelName()) distMap.Insert(ch.GetChannelName())
@ -159,14 +159,6 @@ func (c *ChannelChecker) getDmChannelDiff(collectionID int64,
return return
} }
func (c *ChannelChecker) getChannelDist(replica *meta.Replica) []*meta.DmChannel {
dist := make([]*meta.DmChannel, 0)
for _, nodeID := range replica.GetNodes() {
dist = append(dist, c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(nodeID))...)
}
return dist
}
func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int64) []*meta.DmChannel { func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int64) []*meta.DmChannel {
log := log.Ctx(ctx).WithRateGroup("ChannelChecker.findRepeatedChannels", 1, 60) log := log.Ctx(ctx).WithRateGroup("ChannelChecker.findRepeatedChannels", 1, 60)
replica := c.meta.Get(replicaID) replica := c.meta.Get(replicaID)
@ -176,7 +168,7 @@ func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int
log.Info("replica does not exist, skip it") log.Info("replica does not exist, skip it")
return ret return ret
} }
dist := c.getChannelDist(replica) dist := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
targets := c.targetMgr.GetSealedSegmentsByCollection(replica.GetCollectionID(), meta.CurrentTarget) targets := c.targetMgr.GetSealedSegmentsByCollection(replica.GetCollectionID(), meta.CurrentTarget)
versionsMap := make(map[string]*meta.DmChannel) versionsMap := make(map[string]*meta.DmChannel)
@ -221,7 +213,7 @@ func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []*
for _, ch := range channels { for _, ch := range channels {
rwNodes := replica.GetChannelRWNodes(ch.GetChannelName()) rwNodes := replica.GetChannelRWNodes(ch.GetChannelName())
if len(rwNodes) == 0 { if len(rwNodes) == 0 {
rwNodes = replica.GetNodes() rwNodes = replica.GetRWNodes()
} }
plan := c.balancer.AssignChannel([]*meta.DmChannel{ch}, rwNodes, false) plan := c.balancer.AssignChannel([]*meta.DmChannel{ch}, rwNodes, false)
plans = append(plans, plan...) plans = append(plans, plan...)

View File

@ -102,16 +102,17 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
) )
var tasks []task.Task var tasks []task.Task
segments := c.getSealedSegmentsDist(replica) segments := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
idSegments := make(map[int64]*meta.Segment) idSegments := make(map[int64]*meta.Segment)
roNodeSet := typeutil.NewUniqueSet(replica.GetRONodes()...)
targets := make(map[int64][]int64) // segmentID => FieldID targets := make(map[int64][]int64) // segmentID => FieldID
for _, segment := range segments { for _, segment := range segments {
// skip update index in stopping node // skip update index in read only node
if ok, _ := c.nodeMgr.IsStoppingNode(segment.Node); ok { if roNodeSet.Contain(segment.Node) {
continue continue
} }
missing := c.checkSegment(ctx, segment, indexInfos) missing := c.checkSegment(segment, indexInfos)
if len(missing) > 0 { if len(missing) > 0 {
targets[segment.GetID()] = missing targets[segment.GetID()] = missing
idSegments[segment.GetID()] = segment idSegments[segment.GetID()] = segment
@ -142,7 +143,7 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
return tasks return tasks
} }
func (c *IndexChecker) checkSegment(ctx context.Context, segment *meta.Segment, indexInfos []*indexpb.IndexInfo) (fieldIDs []int64) { func (c *IndexChecker) checkSegment(segment *meta.Segment, indexInfos []*indexpb.IndexInfo) (fieldIDs []int64) {
var result []int64 var result []int64
for _, indexInfo := range indexInfos { for _, indexInfo := range indexInfos {
fieldID, indexID := indexInfo.FieldID, indexInfo.IndexID fieldID, indexID := indexInfo.FieldID, indexInfo.IndexID
@ -158,14 +159,6 @@ func (c *IndexChecker) checkSegment(ctx context.Context, segment *meta.Segment,
return result return result
} }
func (c *IndexChecker) getSealedSegmentsDist(replica *meta.Replica) []*meta.Segment {
var ret []*meta.Segment
for _, node := range replica.GetNodes() {
ret = append(ret, c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))...)
}
return ret
}
func (c *IndexChecker) createSegmentUpdateTask(ctx context.Context, segment *meta.Segment, replica *meta.Replica) (task.Task, bool) { func (c *IndexChecker) createSegmentUpdateTask(ctx context.Context, segment *meta.Segment, replica *meta.Replica) (task.Task, bool) {
action := task.NewSegmentActionWithScope(segment.Node, task.ActionTypeUpdate, segment.GetInsertChannel(), segment.GetID(), querypb.DataScope_Historical) action := task.NewSegmentActionWithScope(segment.Node, task.ActionTypeUpdate, segment.GetInsertChannel(), segment.GetID(), querypb.DataScope_Historical)
t, err := task.NewSegmentTask( t, err := task.NewSegmentTask(

View File

@ -134,9 +134,12 @@ func (suite *IndexCheckerSuite) TestLoadIndex() {
suite.Equal(task.ActionTypeUpdate, action.Type()) suite.Equal(task.ActionTypeUpdate, action.Type())
suite.EqualValues(2, action.SegmentID()) suite.EqualValues(2, action.SegmentID())
// test skip load index for stopping node // test skip load index for read only node
suite.nodeMgr.Stopping(1) suite.nodeMgr.Stopping(1)
suite.nodeMgr.Stopping(2) suite.nodeMgr.Stopping(2)
suite.meta.ResourceManager.HandleNodeStopping(1)
suite.meta.ResourceManager.HandleNodeStopping(2)
utils.RecoverAllCollection(suite.meta)
tasks = checker.Check(context.Background()) tasks = checker.Check(context.Background())
suite.Require().Len(tasks, 0) suite.Require().Len(tasks, 0)
} }

View File

@ -93,12 +93,7 @@ func (c *LeaderChecker) Check(ctx context.Context) []task.Task {
replicas := c.meta.ReplicaManager.GetByCollection(collectionID) replicas := c.meta.ReplicaManager.GetByCollection(collectionID)
for _, replica := range replicas { for _, replica := range replicas {
for _, node := range replica.GetNodes() { for _, node := range replica.GetRWNodes() {
if ok, _ := c.nodeMgr.IsStoppingNode(node); ok {
// no need to correct leader's view which is loaded on stopping node
continue
}
leaderViews := c.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(replica.GetCollectionID()), meta.WithNodeID2LeaderView(node)) leaderViews := c.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(replica.GetCollectionID()), meta.WithNodeID2LeaderView(node))
for _, leaderView := range leaderViews { for _, leaderView := range leaderViews {
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(leaderView.Channel), meta.WithReplica(replica)) dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(leaderView.Channel), meta.WithReplica(replica))

View File

@ -237,7 +237,8 @@ func (suite *LeaderCheckerTestSuite) TestStoppingNode() {
observer := suite.checker observer := suite.checker
observer.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1)) observer.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
observer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1)) observer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1))
observer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1, 2})) replica := utils.CreateTestReplica(1, 1, []int64{1, 2})
observer.meta.ReplicaManager.Put(replica)
segments := []*datapb.SegmentInfo{ segments := []*datapb.SegmentInfo{
{ {
ID: 1, ID: 1,
@ -261,12 +262,9 @@ func (suite *LeaderCheckerTestSuite) TestStoppingNode() {
view.TargetVersion = observer.target.GetCollectionTargetVersion(1, meta.CurrentTarget) view.TargetVersion = observer.target.GetCollectionTargetVersion(1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view) observer.dist.LeaderViewManager.Update(2, view)
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{ mutableReplica := replica.CopyForWrite()
NodeID: 2, mutableReplica.AddRONode(2)
Address: "localhost", observer.meta.ReplicaManager.Put(mutableReplica.IntoReplica())
Hostname: "localhost",
}))
suite.nodeMgr.Stopping(2)
tasks := suite.checker.Check(context.TODO()) tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 0) suite.Len(tasks, 0)

View File

@ -204,7 +204,7 @@ func (c *SegmentChecker) getSealedSegmentDiff(
log.Info("replica does not exist, skip it") log.Info("replica does not exist, skip it")
return return
} }
dist := c.getSealedSegmentsDist(replica) dist := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
sort.Slice(dist, func(i, j int) bool { sort.Slice(dist, func(i, j int) bool {
return dist[i].Version < dist[j].Version return dist[i].Version < dist[j].Version
}) })
@ -293,14 +293,6 @@ func (c *SegmentChecker) getSealedSegmentDiff(
return return
} }
func (c *SegmentChecker) getSealedSegmentsDist(replica *meta.Replica) []*meta.Segment {
ret := make([]*meta.Segment, 0)
for _, node := range replica.GetNodes() {
ret = append(ret, c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))...)
}
return ret
}
func (c *SegmentChecker) findRepeatedSealedSegments(replicaID int64) []*meta.Segment { func (c *SegmentChecker) findRepeatedSealedSegments(replicaID int64) []*meta.Segment {
segments := make([]*meta.Segment, 0) segments := make([]*meta.Segment, 0)
replica := c.meta.Get(replicaID) replica := c.meta.Get(replicaID)
@ -308,7 +300,7 @@ func (c *SegmentChecker) findRepeatedSealedSegments(replicaID int64) []*meta.Seg
log.Info("replica does not exist, skip it") log.Info("replica does not exist, skip it")
return segments return segments
} }
dist := c.getSealedSegmentsDist(replica) dist := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
versions := make(map[int64]*meta.Segment) versions := make(map[int64]*meta.Segment)
for _, s := range dist { for _, s := range dist {
// l0 segment should be release with channel together // l0 segment should be release with channel together
@ -398,25 +390,12 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
rwNodes := replica.GetChannelRWNodes(shard) rwNodes := replica.GetChannelRWNodes(shard)
if len(rwNodes) == 0 { if len(rwNodes) == 0 {
rwNodes = replica.GetNodes() rwNodes = replica.GetRWNodes()
}
// filter out stopping nodes.
availableNodes := lo.Filter(rwNodes, func(node int64, _ int) bool {
stop, err := c.nodeMgr.IsStoppingNode(node)
if err != nil {
return false
}
return !stop
})
if len(availableNodes) == 0 {
return nil
} }
// L0 segment can only be assign to shard leader's node // L0 segment can only be assign to shard leader's node
if isLevel0 { if isLevel0 {
availableNodes = []int64{leader.ID} rwNodes = []int64{leader.ID}
} }
segmentInfos := lo.Map(segments, func(s *datapb.SegmentInfo, _ int) *meta.Segment { segmentInfos := lo.Map(segments, func(s *datapb.SegmentInfo, _ int) *meta.Segment {
@ -424,7 +403,7 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
SegmentInfo: s, SegmentInfo: s,
} }
}) })
shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, availableNodes, false) shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false)
for i := range shardPlans { for i := range shardPlans {
shardPlans[i].Replica = replica shardPlans[i].Replica = replica
} }

View File

@ -46,7 +46,7 @@ import (
func (s *Server) checkAnyReplicaAvailable(collectionID int64) bool { func (s *Server) checkAnyReplicaAvailable(collectionID int64) bool {
for _, replica := range s.meta.ReplicaManager.GetByCollection(collectionID) { for _, replica := range s.meta.ReplicaManager.GetByCollection(collectionID) {
isAvailable := true isAvailable := true
for _, node := range replica.GetNodes() { for _, node := range replica.GetRONodes() {
if s.nodeMgr.Get(node) == nil { if s.nodeMgr.Get(node) == nil {
isAvailable = false isAvailable = false
break break

View File

@ -159,16 +159,12 @@ func (job *LoadCollectionJob) Execute() error {
// API of LoadCollection is wired, we should use map[resourceGroupNames]replicaNumber as input, to keep consistency with `TransferReplica` API. // API of LoadCollection is wired, we should use map[resourceGroupNames]replicaNumber as input, to keep consistency with `TransferReplica` API.
// Then we can implement dynamic replica changed in different resource group independently. // Then we can implement dynamic replica changed in different resource group independently.
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames()) _, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
if err != nil { if err != nil {
msg := "failed to spawn replica for collection" msg := "failed to spawn replica for collection"
log.Warn(msg, zap.Error(err)) log.Warn(msg, zap.Error(err))
return errors.Wrap(err, msg) return errors.Wrap(err, msg)
} }
for _, replica := range replicas {
log.Info("replica created", zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()), zap.String("resourceGroup", replica.GetResourceGroup()))
}
job.undo.IsReplicaCreated = true job.undo.IsReplicaCreated = true
} }
@ -346,16 +342,12 @@ func (job *LoadPartitionJob) Execute() error {
if err != nil { if err != nil {
return err return err
} }
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames()) _, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
if err != nil { if err != nil {
msg := "failed to spawn replica for collection" msg := "failed to spawn replica for collection"
log.Warn(msg, zap.Error(err)) log.Warn(msg, zap.Error(err))
return errors.Wrap(err, msg) return errors.Wrap(err, msg)
} }
for _, replica := range replicas {
log.Info("replica created", zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()), zap.String("resourceGroup", replica.GetResourceGroup()))
}
job.undo.IsReplicaCreated = true job.undo.IsReplicaCreated = true
} }

View File

@ -39,7 +39,7 @@ func NewReplica(replica *querypb.Replica, nodes ...typeutil.UniqueSet) *Replica
} }
// newReplica creates a new replica from pb. // newReplica creates a new replica from pb.
func newReplica(replica *querypb.Replica, channels ...string) *Replica { func newReplica(replica *querypb.Replica) *Replica {
return &Replica{ return &Replica{
replicaPB: proto.Clone(replica).(*querypb.Replica), replicaPB: proto.Clone(replica).(*querypb.Replica),
rwNodes: typeutil.NewUniqueSet(replica.Nodes...), rwNodes: typeutil.NewUniqueSet(replica.Nodes...),
@ -65,7 +65,10 @@ func (replica *Replica) GetResourceGroup() string {
// GetNodes returns the rw nodes of the replica. // GetNodes returns the rw nodes of the replica.
// readonly, don't modify the returned slice. // readonly, don't modify the returned slice.
func (replica *Replica) GetNodes() []int64 { func (replica *Replica) GetNodes() []int64 {
return replica.replicaPB.GetNodes() nodes := make([]int64, 0)
nodes = append(nodes, replica.replicaPB.GetRoNodes()...)
nodes = append(nodes, replica.replicaPB.GetNodes()...)
return nodes
} }
// GetRONodes returns the ro nodes of the replica. // GetRONodes returns the ro nodes of the replica.
@ -74,6 +77,12 @@ func (replica *Replica) GetRONodes() []int64 {
return replica.replicaPB.GetRoNodes() return replica.replicaPB.GetRoNodes()
} }
// GetRONodes returns the rw nodes of the replica.
// readonly, don't modify the returned slice.
func (replica *Replica) GetRWNodes() []int64 {
return replica.replicaPB.GetNodes()
}
// RangeOverRWNodes iterates over the read and write nodes of the replica. // RangeOverRWNodes iterates over the read and write nodes of the replica.
func (replica *Replica) RangeOverRWNodes(f func(node int64) bool) { func (replica *Replica) RangeOverRWNodes(f func(node int64) bool) {
replica.rwNodes.Range(f) replica.rwNodes.Range(f)
@ -131,8 +140,8 @@ func (replica *Replica) GetChannelRWNodes(channelName string) []int64 {
return replica.replicaPB.ChannelNodeInfos[channelName].GetRwNodes() return replica.replicaPB.ChannelNodeInfos[channelName].GetRwNodes()
} }
// copyForWrite returns a mutable replica for write operations. // CopyForWrite returns a mutable replica for write operations.
func (replica *Replica) copyForWrite() *mutableReplica { func (replica *Replica) CopyForWrite() *mutableReplica {
exclusiveRWNodeToChannel := make(map[int64]string) exclusiveRWNodeToChannel := make(map[int64]string)
for name, channelNodeInfo := range replica.replicaPB.GetChannelNodeInfos() { for name, channelNodeInfo := range replica.replicaPB.GetChannelNodeInfos() {
for _, nodeID := range channelNodeInfo.GetRwNodes() { for _, nodeID := range channelNodeInfo.GetRwNodes() {

View File

@ -195,7 +195,7 @@ func (m *ReplicaManager) TransferReplica(collectionID typeutil.UniqueID, srcRGNa
// Node Change will be executed by replica_observer in background. // Node Change will be executed by replica_observer in background.
replicas := make([]*Replica, 0, replicaNum) replicas := make([]*Replica, 0, replicaNum)
for i := 0; i < replicaNum; i++ { for i := 0; i < replicaNum; i++ {
mutableReplica := srcReplicas[i].copyForWrite() mutableReplica := srcReplicas[i].CopyForWrite()
mutableReplica.SetResourceGroup(dstRGName) mutableReplica.SetResourceGroup(dstRGName)
replicas = append(replicas, mutableReplica.IntoReplica()) replicas = append(replicas, mutableReplica.IntoReplica())
} }
@ -350,7 +350,7 @@ func (m *ReplicaManager) RecoverNodesInCollection(collectionID typeutil.UniqueID
// nothing to do. // nothing to do.
return return
} }
mutableReplica := m.replicas[assignment.GetReplicaID()].copyForWrite() mutableReplica := m.replicas[assignment.GetReplicaID()].CopyForWrite()
mutableReplica.AddRONode(roNodes...) // rw -> ro mutableReplica.AddRONode(roNodes...) // rw -> ro
mutableReplica.AddRWNode(recoverableNodes...) // ro -> rw mutableReplica.AddRWNode(recoverableNodes...) // ro -> rw
mutableReplica.AddRWNode(incomingNode...) // unused -> rw mutableReplica.AddRWNode(incomingNode...) // unused -> rw
@ -414,7 +414,7 @@ func (m *ReplicaManager) RemoveNode(replicaID typeutil.UniqueID, nodes ...typeut
return merr.WrapErrReplicaNotFound(replicaID) return merr.WrapErrReplicaNotFound(replicaID)
} }
mutableReplica := replica.copyForWrite() mutableReplica := replica.CopyForWrite()
mutableReplica.RemoveNode(nodes...) // ro -> unused mutableReplica.RemoveNode(nodes...) // ro -> unused
return m.put(mutableReplica.IntoReplica()) return m.put(mutableReplica.IntoReplica())
} }

View File

@ -30,13 +30,13 @@ func (suite *ReplicaSuite) TestReadOperations() {
r := newReplica(suite.replicaPB) r := newReplica(suite.replicaPB)
suite.testRead(r) suite.testRead(r)
// keep same after clone. // keep same after clone.
mutableReplica := r.copyForWrite() mutableReplica := r.CopyForWrite()
suite.testRead(mutableReplica.IntoReplica()) suite.testRead(mutableReplica.IntoReplica())
} }
func (suite *ReplicaSuite) TestClone() { func (suite *ReplicaSuite) TestClone() {
r := newReplica(suite.replicaPB) r := newReplica(suite.replicaPB)
r2 := r.copyForWrite() r2 := r.CopyForWrite()
suite.testRead(r) suite.testRead(r)
// after apply write operation on copy, the original should not be affected. // after apply write operation on copy, the original should not be affected.
@ -68,7 +68,7 @@ func (suite *ReplicaSuite) TestRange() {
}) })
suite.Equal(1, count) suite.Equal(1, count)
mr := r.copyForWrite() mr := r.CopyForWrite()
mr.AddRONode(1) mr.AddRONode(1)
count = 0 count = 0
@ -81,7 +81,7 @@ func (suite *ReplicaSuite) TestRange() {
func (suite *ReplicaSuite) TestWriteOperation() { func (suite *ReplicaSuite) TestWriteOperation() {
r := newReplica(suite.replicaPB) r := newReplica(suite.replicaPB)
mr := r.copyForWrite() mr := r.CopyForWrite()
// test add available node. // test add available node.
suite.False(mr.Contains(5)) suite.False(mr.Contains(5))
@ -158,7 +158,7 @@ func (suite *ReplicaSuite) testRead(r *Replica) {
suite.Equal(suite.replicaPB.GetResourceGroup(), r.GetResourceGroup()) suite.Equal(suite.replicaPB.GetResourceGroup(), r.GetResourceGroup())
// Test GetNodes() // Test GetNodes()
suite.ElementsMatch(suite.replicaPB.GetNodes(), r.GetNodes()) suite.ElementsMatch(suite.replicaPB.GetNodes(), r.GetRWNodes())
// Test GetRONodes() // Test GetRONodes()
suite.ElementsMatch(suite.replicaPB.GetRoNodes(), r.GetRONodes()) suite.ElementsMatch(suite.replicaPB.GetRoNodes(), r.GetRONodes())
@ -195,7 +195,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
}, },
}) })
mutableReplica := r.copyForWrite() mutableReplica := r.CopyForWrite()
// add 10 rw nodes, exclusive mode is false. // add 10 rw nodes, exclusive mode is false.
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
mutableReplica.AddRWNode(int64(i)) mutableReplica.AddRWNode(int64(i))
@ -205,7 +205,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
suite.Equal(0, len(channelNodeInfo.GetRwNodes())) suite.Equal(0, len(channelNodeInfo.GetRwNodes()))
} }
mutableReplica = r.copyForWrite() mutableReplica = r.CopyForWrite()
// add 10 rw nodes, exclusive mode is true. // add 10 rw nodes, exclusive mode is true.
for i := 10; i < 20; i++ { for i := 10; i < 20; i++ {
mutableReplica.AddRWNode(int64(i)) mutableReplica.AddRWNode(int64(i))
@ -216,7 +216,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
} }
// 4 node become read only, exclusive mode still be true // 4 node become read only, exclusive mode still be true
mutableReplica = r.copyForWrite() mutableReplica = r.CopyForWrite()
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
mutableReplica.AddRONode(int64(i)) mutableReplica.AddRONode(int64(i))
} }
@ -226,7 +226,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
} }
// 4 node has been removed, exclusive mode back to false // 4 node has been removed, exclusive mode back to false
mutableReplica = r.copyForWrite() mutableReplica = r.CopyForWrite()
for i := 4; i < 8; i++ { for i := 4; i < 8; i++ {
mutableReplica.RemoveNode(int64(i)) mutableReplica.RemoveNode(int64(i))
} }

View File

@ -453,7 +453,6 @@ func (rm *ResourceManager) HandleNodeDown(node int64) {
rm.rwmutex.Lock() rm.rwmutex.Lock()
defer rm.rwmutex.Unlock() defer rm.rwmutex.Unlock()
// failure of node down can be ignored, node down can be done by `RemoveAllDownNode`.
rm.incomingNode.Remove(node) rm.incomingNode.Remove(node)
// for stopping query node becomes offline, node change won't be triggered, // for stopping query node becomes offline, node change won't be triggered,
@ -470,6 +469,19 @@ func (rm *ResourceManager) HandleNodeDown(node int64) {
) )
} }
func (rm *ResourceManager) HandleNodeStopping(node int64) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
rm.incomingNode.Remove(node)
rgName, err := rm.unassignNode(node)
log.Info("HandleNodeStopping: remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
zap.Error(err),
)
}
// ListenResourceGroupChanged return a listener for resource group changed. // ListenResourceGroupChanged return a listener for resource group changed.
func (rm *ResourceManager) ListenResourceGroupChanged() *syncutil.VersionedListener { func (rm *ResourceManager) ListenResourceGroupChanged() *syncutil.VersionedListener {
return rm.rgChangedNotifier.Listen(syncutil.VersionedListenAtEarliest) return rm.rgChangedNotifier.Listen(syncutil.VersionedListenAtEarliest)
@ -495,25 +507,6 @@ func (rm *ResourceManager) AssignPendingIncomingNode() {
} }
} }
// RemoveAllDownNode remove all down node from resource group.
func (rm *ResourceManager) RemoveAllDownNode() {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
for nodeID := range rm.nodeIDMap {
if node := rm.nodeMgr.Get(nodeID); node == nil || node.IsStoppingState() {
// unassignNode failure can be skip.
rgName, err := rm.unassignNode(nodeID)
log.Info("remove down node from resource group",
zap.Bool("nodeExist", node != nil),
zap.Int64("nodeID", nodeID),
zap.String("rgName", rgName),
zap.Error(err),
)
}
}
}
// AutoRecoverResourceGroup auto recover rg, return recover used node num // AutoRecoverResourceGroup auto recover rg, return recover used node num
func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) error { func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) error {
rm.rwmutex.Lock() rm.rwmutex.Lock()
@ -847,7 +840,8 @@ func (rm *ResourceManager) unassignNode(node int64) (string, error) {
rm.nodeChangedNotifier.NotifyAll() rm.nodeChangedNotifier.NotifyAll()
return rg.GetName(), nil return rg.GetName(), nil
} }
return "", nil
return "", errors.Errorf("node %d not found in any resource group", node)
} }
// validateResourceGroupConfig validate resource group config. // validateResourceGroupConfig validate resource group config.

View File

@ -524,16 +524,6 @@ func (suite *ResourceManagerSuite) TestAutoRecover() {
suite.Equal(80, suite.manager.GetResourceGroup("rg2").NodeNum()) suite.Equal(80, suite.manager.GetResourceGroup("rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup("rg3").NodeNum()) suite.Equal(5, suite.manager.GetResourceGroup("rg3").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(DefaultResourceGroupName).NodeNum()) suite.Equal(5, suite.manager.GetResourceGroup(DefaultResourceGroupName).NodeNum())
// Test down all nodes.
for i := 1; i <= 100; i++ {
suite.manager.nodeMgr.Remove(int64(i))
}
suite.manager.RemoveAllDownNode()
suite.Zero(suite.manager.GetResourceGroup("rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup("rg2").NodeNum())
suite.Zero(suite.manager.GetResourceGroup("rg3").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(DefaultResourceGroupName).NodeNum())
} }
func (suite *ResourceManagerSuite) testTransferNode() { func (suite *ResourceManagerSuite) testTransferNode() {

View File

@ -100,6 +100,7 @@ func (ob *ReplicaObserver) checkNodesInReplica() {
replicas := ob.meta.ReplicaManager.GetByCollection(collectionID) replicas := ob.meta.ReplicaManager.GetByCollection(collectionID)
for _, replica := range replicas { for _, replica := range replicas {
roNodes := replica.GetRONodes() roNodes := replica.GetRONodes()
rwNodes := replica.GetRWNodes()
if len(roNodes) == 0 { if len(roNodes) == 0 {
continue continue
} }
@ -124,7 +125,7 @@ func (ob *ReplicaObserver) checkNodesInReplica() {
zap.Int64("replicaID", replica.GetID()), zap.Int64("replicaID", replica.GetID()),
zap.Int64s("removedNodes", removeNodes), zap.Int64s("removedNodes", removeNodes),
zap.Int64s("roNodes", roNodes), zap.Int64s("roNodes", roNodes),
zap.Int64s("availableNodes", replica.GetNodes()), zap.Int64s("rwNodes", rwNodes),
) )
if err := ob.meta.ReplicaManager.RemoveNode(replica.GetID(), removeNodes...); err != nil { if err := ob.meta.ReplicaManager.RemoveNode(replica.GetID(), removeNodes...); err != nil {
logger.Warn("fail to remove node from replica", zap.Error(err)) logger.Warn("fail to remove node from replica", zap.Error(err))

View File

@ -98,10 +98,6 @@ func (ob *ResourceObserver) checkAndRecoverResourceGroup() {
manager.AssignPendingIncomingNode() manager.AssignPendingIncomingNode()
} }
// Remove all down nodes in resource group manager.
log.Debug("remove all down nodes in resource group manager...")
ob.meta.RemoveAllDownNode()
log.Debug("recover resource groups...") log.Debug("recover resource groups...")
// Recover all resource group into expected configuration. // Recover all resource group into expected configuration.
for _, rgName := range rgNames { for _, rgName := range rgNames {

View File

@ -136,6 +136,7 @@ func (suite *ResourceObserverSuite) TestObserverRecoverOperation() {
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2")) suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2"))
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg3")) suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg3"))
// new node is down, rg3 cannot use that node anymore. // new node is down, rg3 cannot use that node anymore.
suite.meta.ResourceManager.HandleNodeDown(10)
suite.observer.checkAndRecoverResourceGroup() suite.observer.checkAndRecoverResourceGroup()
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg1")) suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg1"))
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2")) suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2"))

View File

@ -276,7 +276,7 @@ func (s *Server) TransferSegment(ctx context.Context, req *querypb.TransferSegme
// when no dst node specified, default to use all other nodes in same // when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet() dstNodeSet := typeutil.NewUniqueSet()
if req.GetToAllNodes() { if req.GetToAllNodes() {
dstNodeSet.Insert(replica.GetNodes()...) dstNodeSet.Insert(replica.GetRWNodes()...)
} else { } else {
// check whether dstNode is healthy // check whether dstNode is healthy
if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil { if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil {
@ -348,7 +348,7 @@ func (s *Server) TransferChannel(ctx context.Context, req *querypb.TransferChann
// when no dst node specified, default to use all other nodes in same // when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet() dstNodeSet := typeutil.NewUniqueSet()
if req.GetToAllNodes() { if req.GetToAllNodes() {
dstNodeSet.Insert(replica.GetNodes()...) dstNodeSet.Insert(replica.GetRWNodes()...)
} else { } else {
// check whether dstNode is healthy // check whether dstNode is healthy
if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil { if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil {

View File

@ -441,7 +441,6 @@ func (s *Server) startQueryCoord() error {
s.nodeMgr.Stopping(node.ServerID) s.nodeMgr.Stopping(node.ServerID)
} }
} }
s.checkReplicas()
for _, node := range sessions { for _, node := range sessions {
s.handleNodeUp(node.ServerID) s.handleNodeUp(node.ServerID)
} }
@ -685,6 +684,7 @@ func (s *Server) watchNodes(revision int64) {
) )
s.nodeMgr.Stopping(nodeID) s.nodeMgr.Stopping(nodeID)
s.checkerController.Check() s.checkerController.Check()
s.meta.ResourceManager.HandleNodeStopping(nodeID)
case sessionutil.SessionDelEvent: case sessionutil.SessionDelEvent:
nodeID := event.Session.ServerID nodeID := event.Session.ServerID
@ -748,7 +748,6 @@ func (s *Server) handleNodeUp(node int64) {
} }
func (s *Server) handleNodeDown(node int64) { func (s *Server) handleNodeDown(node int64) {
log := log.With(zap.Int64("nodeID", node))
s.taskScheduler.RemoveExecutor(node) s.taskScheduler.RemoveExecutor(node)
s.distController.Remove(node) s.distController.Remove(node)
@ -757,57 +756,12 @@ func (s *Server) handleNodeDown(node int64) {
s.dist.ChannelDistManager.Update(node) s.dist.ChannelDistManager.Update(node)
s.dist.SegmentDistManager.Update(node) s.dist.SegmentDistManager.Update(node)
// Clear meta
for _, collection := range s.meta.CollectionManager.GetAll() {
log := log.With(zap.Int64("collectionID", collection))
replica := s.meta.ReplicaManager.GetByCollectionAndNode(collection, node)
if replica == nil {
continue
}
err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), node)
if err != nil {
log.Warn("failed to remove node from collection's replicas",
zap.Int64("replicaID", replica.GetID()),
zap.Error(err),
)
}
log.Info("remove node from replica",
zap.Int64("replicaID", replica.GetID()))
}
// Clear tasks // Clear tasks
s.taskScheduler.RemoveByNode(node) s.taskScheduler.RemoveByNode(node)
s.meta.ResourceManager.HandleNodeDown(node) s.meta.ResourceManager.HandleNodeDown(node)
} }
// checkReplicas checks whether replica contains offline node, and remove those nodes
func (s *Server) checkReplicas() {
for _, collection := range s.meta.CollectionManager.GetAll() {
log := log.With(zap.Int64("collectionID", collection))
replicas := s.meta.ReplicaManager.GetByCollection(collection)
for _, replica := range replicas {
toRemove := make([]int64, 0)
for _, node := range replica.GetNodes() {
if s.nodeMgr.Get(node) == nil {
toRemove = append(toRemove, node)
}
}
if len(toRemove) > 0 {
log := log.With(
zap.Int64("replicaID", replica.GetID()),
zap.Int64s("offlineNodes", toRemove),
)
log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove))
if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil {
log.Warn("failed to remove offline nodes from replica")
}
}
}
}
}
func (s *Server) updateBalanceConfigLoop(ctx context.Context) { func (s *Server) updateBalanceConfigLoop(ctx context.Context) {
success := s.updateBalanceConfig() success := s.updateBalanceConfig()
if success { if success {

View File

@ -686,7 +686,7 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
// when no dst node specified, default to use all other nodes in same // when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet() dstNodeSet := typeutil.NewUniqueSet()
if len(req.GetDstNodeIDs()) == 0 { if len(req.GetDstNodeIDs()) == 0 {
dstNodeSet.Insert(replica.GetNodes()...) dstNodeSet.Insert(replica.GetRWNodes()...)
} else { } else {
for _, dstNode := range req.GetDstNodeIDs() { for _, dstNode := range req.GetDstNodeIDs() {
if !replica.Contains(dstNode) { if !replica.Contains(dstNode) {
@ -1075,7 +1075,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
replicasInRG := s.meta.GetByResourceGroup(req.GetResourceGroup()) replicasInRG := s.meta.GetByResourceGroup(req.GetResourceGroup())
for _, replica := range replicasInRG { for _, replica := range replicasInRG {
loadedReplicas[replica.GetCollectionID()]++ loadedReplicas[replica.GetCollectionID()]++
for _, node := range replica.GetNodes() { for _, node := range replica.GetRONodes() {
if !s.meta.ContainsNode(replica.GetResourceGroup(), node) { if !s.meta.ContainsNode(replica.GetResourceGroup(), node) {
outgoingNodes[replica.GetCollectionID()]++ outgoingNodes[replica.GetCollectionID()]++
} }
@ -1090,7 +1090,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
if replica.GetResourceGroup() == req.GetResourceGroup() { if replica.GetResourceGroup() == req.GetResourceGroup() {
continue continue
} }
for _, node := range replica.GetNodes() { for _, node := range replica.GetRONodes() {
if s.meta.ContainsNode(req.GetResourceGroup(), node) { if s.meta.ContainsNode(req.GetResourceGroup(), node) {
incomingNodes[collection]++ incomingNodes[collection]++
} }
@ -1101,8 +1101,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
nodes := make([]*commonpb.NodeInfo, 0, len(rg.GetNodes())) nodes := make([]*commonpb.NodeInfo, 0, len(rg.GetNodes()))
for _, nodeID := range rg.GetNodes() { for _, nodeID := range rg.GetNodes() {
nodeSessionInfo := s.nodeMgr.Get(nodeID) nodeSessionInfo := s.nodeMgr.Get(nodeID)
// Filter offline nodes and nodes in stopping state if nodeSessionInfo != nil {
if nodeSessionInfo != nil && !nodeSessionInfo.IsStoppingState() {
nodes = append(nodes, &commonpb.NodeInfo{ nodes = append(nodes, &commonpb.NodeInfo{
NodeId: nodeSessionInfo.ID(), NodeId: nodeSessionInfo.ID(),
Address: nodeSessionInfo.Addr(), Address: nodeSessionInfo.Addr(),

View File

@ -432,7 +432,8 @@ func (suite *ServiceSuite) TestResourceGroup() {
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{ server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 1, ID: 1,
CollectionID: 1, CollectionID: 1,
Nodes: []int64{1011, 1013}, Nodes: []int64{1011},
RoNodes: []int64{1013},
ResourceGroup: "rg11", ResourceGroup: "rg11",
}, },
typeutil.NewUniqueSet(1011, 1013)), typeutil.NewUniqueSet(1011, 1013)),
@ -440,7 +441,8 @@ func (suite *ServiceSuite) TestResourceGroup() {
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{ server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 2, ID: 2,
CollectionID: 2, CollectionID: 2,
Nodes: []int64{1012, 1014}, Nodes: []int64{1014},
RoNodes: []int64{1012},
ResourceGroup: "rg12", ResourceGroup: "rg12",
}, },
typeutil.NewUniqueSet(1012, 1014)), typeutil.NewUniqueSet(1012, 1014)),

View File

@ -22,7 +22,6 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
"github.com/milvus-io/milvus/internal/querycoordv2/meta" "github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
@ -35,19 +34,6 @@ var (
ErrUseWrongNumRG = errors.New("resource group num can only be 0, 1 or same as replica number") ErrUseWrongNumRG = errors.New("resource group num can only be 0, 1 or same as replica number")
) )
func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, replicaID int64) []*session.NodeInfo {
replica := replicaMgr.Get(replicaID)
if replica == nil {
return nil
}
nodes := make([]*session.NodeInfo, 0, len(replica.GetNodes()))
for _, node := range replica.GetNodes() {
nodes = append(nodes, nodeMgr.Get(node))
}
return nodes
}
func GetPartitions(collectionMgr *meta.CollectionManager, collectionID int64) ([]int64, error) { func GetPartitions(collectionMgr *meta.CollectionManager, collectionID int64) ([]int64, error) {
collection := collectionMgr.GetCollection(collectionID) collection := collectionMgr.GetCollection(collectionID)
if collection != nil { if collection != nil {