enhance: Optimize shard serviceable mechanism (#41937)

issue: https://github.com/milvus-io/milvus/issues/41690
- Merge leader view and channel management into ChannelDistManager,
allowing a channel to have multiple delegators.
- Improve shard leader switching to ensure a single replica only has one
shard leader per channel. The shard leader handles all resource loading
and query requests.
- Refine the serviceable mechanism: after QC completes loading, sync the
query view to the delegator. The delegator then determines its
serviceable status based on the query view.
- When a delegator encounters forwarding query or deletion failures,
mark the corresponding segment as offline and transition it to an
unserviceable state.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2025-05-22 11:38:24 +08:00 committed by GitHub
parent f021b3f26a
commit 78010262f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
55 changed files with 4710 additions and 3355 deletions

View File

@ -291,11 +291,18 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestAssignSegmentWithGrowing()
defer paramtable.Get().Reset(paramtable.Get().QueryCoordCfg.DelegatorMemoryOverloadFactor.Key)
// mock 50 growing row count in node 1, which is delegator, expect all segment assign to node 2
leaderView := &meta.LeaderView{
ID: 1,
CollectionID: 1,
}
suite.balancer.dist.LeaderViewManager.Update(1, leaderView)
suite.balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "v1",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 1,
NumOfGrowingRows: 50,
},
})
plans := balancer.AssignSegment(ctx, 1, toAssign, lo.Keys(distributions), false)
for _, p := range plans {
suite.Equal(int64(2), p.To)
@ -752,12 +759,12 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestMultiReplicaBalance() {
},
channelDist: map[int64][]*meta.DmChannel{
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3, View: &meta.LeaderView{ID: 4, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -815,10 +822,10 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestMultiReplicaBalance() {
suite.Len(channelPlans, 2)
// mock new distribution after channel balance
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 2})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 4})
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 4, View: &meta.LeaderView{ID: 4, CollectionID: 1}})
// expected to balance segment
segmentPlans, channelPlans = suite.getCollectionBalancePlans(balancer, c.collectionID)
@ -909,6 +916,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Cha
ChannelName: "channel2",
},
Node: ch1Nodes[0],
View: &meta.LeaderView{ID: 2, CollectionID: collectionID},
},
}...)
@ -984,6 +992,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Seg
ChannelName: "channel1",
},
Node: ch1Nodes[0],
View: &meta.LeaderView{ID: ch1Nodes[0], CollectionID: collectionID},
},
}...)
@ -994,6 +1003,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Seg
ChannelName: "channel2",
},
Node: ch2Nodes[0],
View: &meta.LeaderView{ID: ch2Nodes[0], CollectionID: collectionID},
},
}...)
@ -1082,6 +1092,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Nod
ChannelName: "channel1",
},
Node: ch1Nodes[0],
View: &meta.LeaderView{ID: ch1Nodes[0], CollectionID: collectionID},
},
}...)
@ -1092,6 +1103,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Nod
ChannelName: "channel2",
},
Node: ch2Nodes[0],
View: &meta.LeaderView{ID: ch2Nodes[0], CollectionID: collectionID},
},
}...)
@ -1207,6 +1219,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Seg
ChannelName: "channel1",
},
Node: ch1Nodes[0],
View: &meta.LeaderView{ID: ch1Nodes[0], CollectionID: collectionID},
},
}...)
@ -1217,6 +1230,7 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Seg
ChannelName: "channel2",
},
Node: ch2Nodes[0],
View: &meta.LeaderView{ID: ch2Nodes[0], CollectionID: collectionID},
},
}...)

View File

@ -55,7 +55,7 @@ func (br *balanceReport) SetMemoryFactor(node int64, memoryFactor float64) {
}
}
func (br *balanceReport) SetDeletagorScore(node int64, delegatorScore float64) {
func (br *balanceReport) SetDelegatorScore(node int64, delegatorScore float64) {
nodeItem, ok := br.nodeItems[node]
if ok {
nodeItem.delegatorScore = delegatorScore

View File

@ -150,9 +150,9 @@ func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*
}
// calculate growing segment row count on node
views := b.dist.LeaderViewManager.GetByFilter(meta.WithNodeID2LeaderView(node))
for _, view := range views {
rowcnt += int(view.NumOfGrowingRows)
channels := b.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(node))
for _, channel := range channels {
rowcnt += int(channel.View.NumOfGrowingRows)
}
// calculate executing task cost in scheduler

View File

@ -250,10 +250,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -280,10 +280,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 1, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{
@ -301,8 +301,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
distributions: map[int64][]*meta.Segment{},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
3: {},
},
@ -320,14 +320,14 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
distributions: map[int64][]*meta.Segment{},
distributionChannels: map[int64][]*meta.DmChannel{
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1, Channel: "v1", Status: &querypb.LeaderViewStatus{Serviceable: true}}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 1, View: &meta.LeaderView{ID: 2, CollectionID: 1, Channel: "v2", Status: &querypb.LeaderViewStatus{Serviceable: true}}},
},
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v4"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v4"}, Node: 3, View: &meta.LeaderView{ID: 4, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -342,8 +342,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
distributions: map[int64][]*meta.Segment{},
distributionChannels: map[int64][]*meta.DmChannel{
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1, Channel: "v1", Status: &querypb.LeaderViewStatus{Serviceable: true}}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 1, View: &meta.LeaderView{ID: 2, CollectionID: 1, Channel: "v2", Status: &querypb.LeaderViewStatus{Serviceable: true}}},
},
2: {},
3: {},
@ -443,8 +443,6 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
assertChannelAssignPlanElementMatch(&suite.Suite, c.expectChannelPlans, channelPlans, true)
}
// clear distribution
for _, node := range c.nodes {
balancer.meta.ResourceManager.HandleNodeDown(ctx, node)
balancer.nodeManager.Remove(node)
@ -535,7 +533,7 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnPartStopping() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{
@ -595,10 +593,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnPartStopping() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -688,10 +686,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOutboundNodes() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -718,10 +716,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOutboundNodes() {
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 1, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{
@ -897,12 +895,18 @@ func (suite *RowCountBasedBalancerTestSuite) TestAssignSegmentWithGrowing() {
}
// mock 50 growing row count in node 1, which is delegator, expect all segment assign to node 2
leaderView := &meta.LeaderView{
ID: 1,
CollectionID: 1,
NumOfGrowingRows: 50,
}
suite.balancer.dist.LeaderViewManager.Update(1, leaderView)
suite.balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "v1",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 1,
NumOfGrowingRows: 50,
},
})
plans := balancer.AssignSegment(ctx, 1, toAssign, lo.Keys(distributions), false)
for _, p := range plans {
suite.Equal(int64(2), p.To)
@ -934,14 +938,18 @@ func (suite *RowCountBasedBalancerTestSuite) TestDisableBalanceChannel() {
distributions: map[int64][]*meta.Segment{},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {},
},
expectPlans: []SegmentAssignPlan{},
expectChannelPlans: []ChannelAssignPlan{
{Channel: &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2}, From: 2, To: 3, Replica: newReplicaDefaultRG(1)},
{Channel: &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"},
Node: 2,
View: &meta.LeaderView{ID: 3, CollectionID: 1},
}, From: 2, To: 3, Replica: newReplicaDefaultRG(1)},
},
enableBalanceChannel: true,
},
@ -955,8 +963,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestDisableBalanceChannel() {
distributions: map[int64][]*meta.Segment{},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}},
},
3: {},
},
@ -1099,12 +1107,12 @@ func (suite *RowCountBasedBalancerTestSuite) TestMultiReplicaBalance() {
},
channelDist: map[int64][]*meta.DmChannel{
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -1162,10 +1170,26 @@ func (suite *RowCountBasedBalancerTestSuite) TestMultiReplicaBalance() {
suite.Len(channelPlans, 2)
// mock new distribution after channel balance
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 2})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 4})
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"},
Node: 1,
View: &meta.LeaderView{ID: 1, CollectionID: 1},
})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"},
Node: 2,
View: &meta.LeaderView{ID: 2, CollectionID: 1},
})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"},
Node: 3,
View: &meta.LeaderView{ID: 3, CollectionID: 1},
})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"},
Node: 4,
View: &meta.LeaderView{ID: 4, CollectionID: 1},
})
// expected to balance segment
segmentPlans, channelPlans = suite.getCollectionBalancePlans(balancer, c.collectionID)

View File

@ -311,11 +311,11 @@ func (b *ScoreBasedBalancer) convertToNodeItemsBySegment(br *balanceReport, coll
nodeScoreMap[node].setAssignedScore(average)
}
// use assignedScore * delegatorOverloadFactor * delegator_num, to preserve fixed memory size for delegator
collectionViews := b.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(collectionID), meta.WithNodeID2LeaderView(node))
if len(collectionViews) > 0 {
delegatorDelta := nodeScoreMap[node].getAssignedScore() * delegatorOverloadFactor * float64(len(collectionViews))
collDelegator := b.dist.ChannelDistManager.GetByFilter(meta.WithCollectionID2Channel(collectionID), meta.WithNodeID2Channel(node))
if len(collDelegator) > 0 {
delegatorDelta := nodeScoreMap[node].getAssignedScore() * delegatorOverloadFactor * float64(len(collDelegator))
nodeScoreMap[node].AddCurrentScoreDelta(delegatorDelta)
br.SetDeletagorScore(node, delegatorDelta)
br.SetDelegatorScore(node, delegatorDelta)
}
}
return nodeScoreMap
@ -376,9 +376,9 @@ func (b *ScoreBasedBalancer) calculateScoreBySegment(br *balanceReport, collecti
}
// calculate global growing segment row count
views := b.dist.LeaderViewManager.GetByFilter(meta.WithNodeID2LeaderView(nodeID))
for _, view := range views {
nodeRowCount += int(float64(view.NumOfGrowingRows))
delegatorList := b.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(nodeID))
for _, d := range delegatorList {
nodeRowCount += int(float64(d.View.NumOfGrowingRows))
}
// calculate executing task cost in scheduler
@ -392,9 +392,9 @@ func (b *ScoreBasedBalancer) calculateScoreBySegment(br *balanceReport, collecti
}
// calculate collection growing segment row count
collectionViews := b.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(collectionID), meta.WithNodeID2LeaderView(nodeID))
for _, view := range collectionViews {
collectionRowCount += int(float64(view.NumOfGrowingRows))
collDelegatorList := b.dist.ChannelDistManager.GetByFilter(meta.WithCollectionID2Channel(collectionID), meta.WithNodeID2Channel(nodeID))
for _, d := range collDelegatorList {
collectionRowCount += int(float64(d.View.NumOfGrowingRows))
}
// calculate executing task cost in scheduler

View File

@ -300,11 +300,18 @@ func (suite *ScoreBasedBalancerTestSuite) TestAssignSegmentWithGrowing() {
}
// mock 50 growing row count in node 1, which is delegator, expect all segment assign to node 2
leaderView := &meta.LeaderView{
ID: 1,
CollectionID: 1,
}
suite.balancer.dist.LeaderViewManager.Update(1, leaderView)
suite.balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "v1",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 1,
NumOfGrowingRows: 50,
},
})
plans := balancer.AssignSegment(ctx, 1, toAssign, lo.Keys(distributions), false)
for _, p := range plans {
suite.Equal(int64(2), p.To)
@ -452,6 +459,19 @@ func (suite *ScoreBasedBalancerTestSuite) TestDelegatorPreserveMemory() {
{SegmentInfo: &datapb.SegmentInfo{ID: 5, CollectionID: 1, NumOfRows: 10}, Node: 2},
},
},
distributionChannels: map[int64][]*meta.DmChannel{
1: {
{
VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v1"},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 1,
NumOfGrowingRows: 10,
},
},
},
},
expectPlans: []SegmentAssignPlan{},
expectChannelPlans: []ChannelAssignPlan{},
},
@ -485,12 +505,6 @@ func (suite *ScoreBasedBalancerTestSuite) TestDelegatorPreserveMemory() {
balancer.dist.ChannelDistManager.Update(node, v...)
}
leaderView := &meta.LeaderView{
ID: 1,
CollectionID: 1,
}
suite.balancer.dist.LeaderViewManager.Update(1, leaderView)
// 3. set up nodes info and resourceManager for balancer
for i := range c.nodes {
nodeInfo := session.NewNodeInfo(session.ImmutableNodeInfo{
@ -940,12 +954,12 @@ func (suite *ScoreBasedBalancerTestSuite) TestMultiReplicaBalance() {
},
channelDist: map[int64][]*meta.DmChannel{
1: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}},
},
},
expectPlans: []SegmentAssignPlan{},
@ -1003,10 +1017,10 @@ func (suite *ScoreBasedBalancerTestSuite) TestMultiReplicaBalance() {
suite.Len(channelPlans, 2)
// mock new distribution after channel balance
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 2})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 4})
balancer.dist.ChannelDistManager.Update(1, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(2, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 2, View: &meta.LeaderView{ID: 2, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(3, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 3, View: &meta.LeaderView{ID: 3, CollectionID: 1}})
balancer.dist.ChannelDistManager.Update(4, &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel4"}, Node: 4, View: &meta.LeaderView{ID: 4, CollectionID: 1}})
// expected to balance segment
segmentPlans, channelPlans = suite.getCollectionBalancePlans(balancer, c.collectionID)
@ -1200,9 +1214,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceSegmentAndChannel() {
// set unbalance channel distribution
balancer.dist.ChannelDistManager.Update(1, []*meta.DmChannel{
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 1},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel1"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel2"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "channel3"}, Node: 1, View: &meta.LeaderView{ID: 1, CollectionID: 1}},
}...)
// expect to generate 2 balance segment task
@ -1269,7 +1283,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnMultiCollections()
collectionID := int64(i)
for i := 0; i < channelNum; i++ {
channelDist = append(channelDist, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 1,
View: &meta.LeaderView{ID: 1, CollectionID: collectionID},
})
}
}
@ -1351,7 +1367,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnDifferentQN() {
channelDist := make([]*meta.DmChannel, 0)
for i := 0; i < channelNum; i++ {
channelDist = append(channelDist, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 1,
View: &meta.LeaderView{ID: 1, CollectionID: collectionID},
})
}
balancer.dist.ChannelDistManager.Update(1, channelDist...)
@ -1433,7 +1451,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnChannelExclusive()
channelDist1 := make([]*meta.DmChannel, 0)
for i := 0; i < channelNum; i++ {
channelDist1 = append(channelDist1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 1,
View: &meta.LeaderView{ID: 1, CollectionID: collectionID},
})
}
balancer.dist.ChannelDistManager.Update(1, channelDist1...)
@ -1442,7 +1462,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnChannelExclusive()
channelDist2 := make([]*meta.DmChannel, 0)
for i := 0; i < channelNum; i++ {
channelDist2 = append(channelDist2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 2,
View: &meta.LeaderView{ID: 2, CollectionID: collectionID},
})
}
balancer.dist.ChannelDistManager.Update(2, channelDist2...)
@ -1451,7 +1473,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnChannelExclusive()
channelDist3 := make([]*meta.DmChannel, 0)
for i := 0; i < channelNum; i++ {
channelDist3 = append(channelDist3, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 3,
View: &meta.LeaderView{ID: 3, CollectionID: collectionID},
})
}
balancer.dist.ChannelDistManager.Update(3, channelDist3...)
@ -1517,7 +1541,9 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceChannelOnStoppingNode() {
collectionID := int64(i)
for i := 0; i < channelNum; i++ {
channelDist = append(channelDist, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)}, Node: 1,
VchannelInfo: &datapb.VchannelInfo{CollectionID: collectionID, ChannelName: fmt.Sprintf("channel-%d-%d", collectionID, i)},
Node: 1,
View: &meta.LeaderView{ID: 1, CollectionID: collectionID},
})
}
}

View File

@ -239,10 +239,8 @@ func (b *BalanceChecker) Check(ctx context.Context) []task.Task {
// if balance on multiple collections is disabled, and there are already some tasks, break
break
}
if len(channelTasks) < channelBatchSize {
replicasToBalance := b.getReplicaForStoppingBalance(ctx)
generateBalanceTaskForReplicas(replicasToBalance)
}
replicasToBalance := b.getReplicaForStoppingBalance(ctx)
generateBalanceTaskForReplicas(replicasToBalance)
}
} else {
// then check for auto balance

View File

@ -181,49 +181,23 @@ func (c *ChannelChecker) getDmChannelDiff(ctx context.Context, collectionID int6
func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int64) []*meta.DmChannel {
log := log.Ctx(ctx).WithRateGroup("ChannelChecker.findRepeatedChannels", 1, 60)
replica := c.meta.Get(ctx, replicaID)
ret := make([]*meta.DmChannel, 0)
dupChannels := make([]*meta.DmChannel, 0)
if replica == nil {
log.Info("replica does not exist, skip it")
return ret
return dupChannels
}
dist := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
versionsMap := make(map[string]*meta.DmChannel)
for _, ch := range dist {
leaderView := c.dist.LeaderViewManager.GetLeaderShardView(ch.Node, ch.GetChannelName())
if leaderView == nil {
log.Info("shard leader view is not ready, skip",
zap.Int64("collectionID", replica.GetCollectionID()),
zap.Int64("replicaID", replicaID),
zap.Int64("leaderID", ch.Node),
zap.String("channel", ch.GetChannelName()))
continue
}
if leaderView.UnServiceableError != nil {
log.RatedInfo(10, "replica has unavailable shard leader",
zap.Int64("collectionID", replica.GetCollectionID()),
zap.Int64("replicaID", replicaID),
zap.Int64("leaderID", ch.Node),
zap.String("channel", ch.GetChannelName()),
zap.Error(leaderView.UnServiceableError))
continue
}
maxVer, ok := versionsMap[ch.GetChannelName()]
if !ok {
versionsMap[ch.GetChannelName()] = ch
continue
}
if maxVer.Version <= ch.Version {
ret = append(ret, maxVer)
versionsMap[ch.GetChannelName()] = ch
} else {
ret = append(ret, ch)
delegatorList := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
for _, delegator := range delegatorList {
leader := c.dist.ChannelDistManager.GetShardLeader(delegator.GetChannelName(), replica)
// if channel's version is smaller than shard leader's version, it means that the channel is not up to date
if delegator.Version < leader.Version && delegator.Node != leader.Node {
dupChannels = append(dupChannels, delegator)
}
}
return ret
return dupChannels
}
func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []*meta.DmChannel, replica *meta.Replica) []task.Task {

View File

@ -34,6 +34,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/v2/kv"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
@ -169,10 +170,38 @@ func (suite *ChannelCheckerTestSuite) TestReduceChannel() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
checker.targetMgr.UpdateCollectionCurrentTarget(ctx, int64(1))
checker.dist.ChannelDistManager.Update(1, utils.CreateTestChannel(1, 1, 1, "test-insert-channel1"))
checker.dist.LeaderViewManager.Update(1, &meta.LeaderView{ID: 1, Channel: "test-insert-channel1"})
checker.dist.ChannelDistManager.Update(1, utils.CreateTestChannel(1, 1, 1, "test-insert-channel2"))
checker.dist.LeaderViewManager.Update(1, &meta.LeaderView{ID: 1, Channel: "test-insert-channel2"})
checker.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel1",
},
Node: 1,
Version: 1,
View: &meta.LeaderView{
ID: 1,
Channel: "test-insert-channel1",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
}, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel2",
},
Node: 1,
Version: 1,
View: &meta.LeaderView{
ID: 1,
Channel: "test-insert-channel2",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
suite.setNodeAvailable(1)
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -210,15 +239,59 @@ func (suite *ChannelCheckerTestSuite) TestRepeatedChannels() {
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(1)).Return(
channels, segments, nil)
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
checker.dist.ChannelDistManager.Update(1, utils.CreateTestChannel(1, 1, 1, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 2, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 1,
Version: 1,
View: &meta.LeaderView{
ID: 1,
Channel: "test-insert-channel",
Version: 1,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 2,
View: &meta.LeaderView{
ID: 2,
Channel: "test-insert-channel",
Version: 2,
Status: &querypb.LeaderViewStatus{
Serviceable: false,
},
},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 0)
suite.setNodeAvailable(1, 2)
checker.dist.LeaderViewManager.Update(1, &meta.LeaderView{ID: 1, Channel: "test-insert-channel"})
checker.dist.LeaderViewManager.Update(2, &meta.LeaderView{ID: 2, Channel: "test-insert-channel"})
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 2,
View: &meta.LeaderView{
ID: 2,
Channel: "test-insert-channel",
Version: 2,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
tasks = checker.Check(context.TODO())
suite.Len(tasks, 1)
suite.EqualValues(1, tasks[0].ReplicaID())
@ -239,19 +312,6 @@ func (suite *ChannelCheckerTestSuite) TestReleaseDirtyChannels() {
err = checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(1, 1, []int64{1}))
suite.NoError(err)
segments := []*datapb.SegmentInfo{
{
ID: 1,
InsertChannel: "test-insert-channel",
},
}
channels := []*datapb.VchannelInfo{
{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
}
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
@ -263,13 +323,38 @@ func (suite *ChannelCheckerTestSuite) TestReleaseDirtyChannels() {
Hostname: "localhost",
}))
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(1)).Return(
channels, segments, nil)
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
checker.dist.ChannelDistManager.Update(1, utils.CreateTestChannel(1, 1, 2, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 2, "test-insert-channel"))
checker.dist.LeaderViewManager.Update(1, &meta.LeaderView{ID: 1, Channel: "test-insert-channel"})
checker.dist.LeaderViewManager.Update(2, &meta.LeaderView{ID: 2, Channel: "test-insert-channel"})
checker.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 1,
Version: 1,
View: &meta.LeaderView{
ID: 1,
Channel: "test-insert-channel",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 2,
View: &meta.LeaderView{
ID: 2,
Channel: "test-insert-channel",
Version: 2,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)

View File

@ -35,6 +35,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/v2/kv"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
@ -124,8 +125,23 @@ func (suite *CheckerControllerSuite) TestBasic() {
suite.targetManager.UpdateCollectionNextTarget(ctx, int64(1))
// set dist
suite.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
suite.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
// View: utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}),
View: &meta.LeaderView{
ID: 2,
Channel: "test-insert-channel",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
counter := atomic.NewInt64(0)
suite.scheduler.EXPECT().Add(mock.Anything).Run(func(task task.Task) {
@ -154,8 +170,22 @@ func (suite *CheckerControllerSuite) TestBasic() {
}, 3*time.Second, 1*time.Millisecond)
// until new channel has been subscribed
suite.dist.ChannelDistManager.Update(1, utils.CreateTestChannel(1, 1, 1, "test-insert-channel2"))
suite.dist.LeaderViewManager.Update(1, utils.CreateTestLeaderView(1, 1, "test-insert-channel2", map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel2",
},
Node: 1,
Version: 1,
View: &meta.LeaderView{
ID: 1,
Channel: "test-insert-channel2",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
// expect assign segment after channel has been subscribed
suite.Eventually(func() bool {

View File

@ -97,12 +97,12 @@ func (c *LeaderChecker) Check(ctx context.Context) []task.Task {
nodes = replica.GetRWSQNodes()
}
for _, node := range nodes {
leaderViews := c.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(replica.GetCollectionID()), meta.WithNodeID2LeaderView(node))
for _, leaderView := range leaderViews {
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(leaderView.Channel), meta.WithReplica(replica))
tasks = append(tasks, c.findNeedLoadedSegments(ctx, replica, leaderView, dist)...)
tasks = append(tasks, c.findNeedRemovedSegments(ctx, replica, leaderView, dist)...)
tasks = append(tasks, c.findNeedSyncPartitionStats(ctx, replica, leaderView, node)...)
delegatorList := c.dist.ChannelDistManager.GetByFilter(meta.WithCollectionID2Channel(replica.GetCollectionID()), meta.WithNodeID2Channel(node))
for _, d := range delegatorList {
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(d.View.Channel), meta.WithReplica(replica))
tasks = append(tasks, c.findNeedLoadedSegments(ctx, replica, d.View, dist)...)
tasks = append(tasks, c.findNeedRemovedSegments(ctx, replica, d.View, dist)...)
tasks = append(tasks, c.findNeedSyncPartitionStats(ctx, replica, d.View, node)...)
}
}
}

View File

@ -124,10 +124,20 @@ func (suite *LeaderCheckerTestSuite) TestSyncLoadedSegments() {
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
loadVersion := time.Now().UnixMilli()
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 2, loadVersion, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
},
})
tasks = suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -145,13 +155,21 @@ func (suite *LeaderCheckerTestSuite) TestSyncLoadedSegments() {
version1, version2 := int64(1), int64(2)
observer.dist.SegmentDistManager.Update(node1)
observer.dist.SegmentDistManager.Update(node2, utils.CreateTestSegment(1, 1, 1, node2, version2, "test-insert-channel"))
view = utils.CreateTestLeaderView(node2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
view.Segments[1] = &querypb.SegmentDist{
NodeID: node1,
Version: version1,
}
observer.dist.LeaderViewManager.Update(node2, view)
observer.dist.ChannelDistManager.Update(node2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: node2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: node1, Version: version1}},
},
})
tasks = suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -199,10 +217,20 @@ func (suite *LeaderCheckerTestSuite) TestActivation() {
observer.target.UpdateCollectionNextTarget(ctx, int64(1))
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 2, 1, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
},
})
suite.checker.Deactivate()
tasks := suite.checker.Check(context.TODO())
@ -243,10 +271,20 @@ func (suite *LeaderCheckerTestSuite) TestStoppingNode() {
observer.target.UpdateCollectionNextTarget(ctx, int64(1))
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 2, 1, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
},
})
mutableReplica := replica.CopyForWrite()
mutableReplica.AddRONode(2)
@ -293,11 +331,20 @@ func (suite *LeaderCheckerTestSuite) TestIgnoreSyncLoadedSegments() {
observer.target.UpdateCollectionNextTarget(ctx, int64(1))
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 2, 1, "test-insert-channel"),
utils.CreateTestSegment(1, 1, 2, 2, 1, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
},
})
tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
suite.Equal(tasks[0].Source(), utils.LeaderChecker)
@ -346,14 +393,35 @@ func (suite *LeaderCheckerTestSuite) TestSyncLoadedSegmentsWithReplicas() {
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 1, 0, "test-insert-channel"))
observer.dist.SegmentDistManager.Update(4, utils.CreateTestSegment(1, 1, 1, 4, 0, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
observer.dist.ChannelDistManager.Update(4, utils.CreateTestChannel(1, 4, 2, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
view2 := utils.CreateTestLeaderView(4, 1, "test-insert-channel", map[int64]int64{1: 4}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(4, view2)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
},
})
observer.dist.ChannelDistManager.Update(4, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 4,
Version: 2,
View: &meta.LeaderView{
ID: 4,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 4}},
},
})
tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -385,10 +453,21 @@ func (suite *LeaderCheckerTestSuite) TestSyncRemovedSegments() {
observer.target.UpdateCollectionNextTarget(ctx, int64(1))
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{3: 1}, map[int64]*meta.Segment{})
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 1}},
},
})
tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -426,8 +505,21 @@ func (suite *LeaderCheckerTestSuite) TestIgnoreSyncRemovedSegments() {
channels, segments, nil)
observer.target.UpdateCollectionNextTarget(ctx, int64(1))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
observer.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{3: 2, 2: 2}, map[int64]*meta.Segment{}))
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 2}, 2: {NodeID: 2}},
},
})
tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -443,7 +535,7 @@ func (suite *LeaderCheckerTestSuite) TestIgnoreSyncRemovedSegments() {
func (suite *LeaderCheckerTestSuite) TestUpdatePartitionStats() {
ctx := context.Background()
testChannel := "test-insert-channel"
leaderID := int64(2)
// leaderID := int64(2)
observer := suite.checker
observer.meta.CollectionManager.PutCollection(ctx, utils.CreateTestCollection(1, 1))
observer.meta.CollectionManager.PutPartition(ctx, utils.CreateTestPartition(1, 1))
@ -477,14 +569,31 @@ func (suite *LeaderCheckerTestSuite) TestUpdatePartitionStats() {
observer.target.UpdateCollectionCurrentTarget(ctx, 1)
loadVersion := time.Now().UnixMilli()
observer.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 2, 1, loadVersion, testChannel))
observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, testChannel))
view := utils.CreateTestLeaderView(2, 1, testChannel, map[int64]int64{2: 1}, map[int64]*meta.Segment{})
view.PartitionStatsVersions = map[int64]int64{
1: 100,
}
// observer.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, testChannel))
// view := utils.CreateTestLeaderView(2, 1, testChannel, map[int64]int64{2: 1}, map[int64]*meta.Segment{})
// view.PartitionStatsVersions = map[int64]int64{
// 1: 100,
// }
// current partition stat version in leader view is version100 for partition1
view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(leaderID, view)
// view.TargetVersion = observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget)
// observer.dist.ShardLeaderManager.Update(leaderID, view)
observer.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: observer.target.GetCollectionTargetVersion(ctx, 1, meta.CurrentTarget),
PartitionStatsVersions: map[int64]int64{
1: 100,
},
},
})
tasks = suite.checker.Check(context.TODO())
suite.Len(tasks, 1)

View File

@ -128,13 +128,11 @@ func (c *SegmentChecker) checkReplica(ctx context.Context, replica *meta.Replica
// compare with targets to find the lack and redundancy of segments
lacks, redundancies := c.getSealedSegmentDiff(ctx, replica.GetCollectionID(), replica.GetID())
// loadCtx := trace.ContextWithSpan(context.Background(), c.meta.GetCollection(replica.CollectionID).LoadSpan)
tasks := c.createSegmentLoadTasks(c.getTraceCtx(ctx, replica.GetCollectionID()), lacks, replica)
task.SetReason("lacks of segment", tasks...)
task.SetPriority(task.TaskPriorityNormal, tasks...)
ret = append(ret, tasks...)
redundancies = c.filterSegmentInUse(ctx, replica, redundancies)
tasks = c.createSegmentReduceTasks(c.getTraceCtx(ctx, replica.GetCollectionID()), redundancies, replica, querypb.DataScope_Historical)
task.SetReason("segment not exists in target", tasks...)
task.SetPriority(task.TaskPriorityNormal, tasks...)
@ -142,7 +140,7 @@ func (c *SegmentChecker) checkReplica(ctx context.Context, replica *meta.Replica
// compare inner dists to find repeated loaded segments
redundancies = c.findRepeatedSealedSegments(ctx, replica.GetID())
redundancies = c.filterExistedOnLeader(replica, redundancies)
redundancies = c.filterInUsedByDelegator(replica, redundancies)
tasks = c.createSegmentReduceTasks(c.getTraceCtx(ctx, replica.GetCollectionID()), redundancies, replica, querypb.DataScope_Historical)
task.SetReason("redundancies of segment", tasks...)
// set deduplicate task priority to low, to avoid deduplicate task cancel balance task
@ -173,19 +171,15 @@ func (c *SegmentChecker) getGrowingSegmentDiff(ctx context.Context, collectionID
zap.Int64("collectionID", collectionID),
zap.Int64("replicaID", replica.GetID()))
leaders := c.dist.ChannelDistManager.GetShardLeadersByReplica(replica)
for channelName, node := range leaders {
view := c.dist.LeaderViewManager.GetLeaderShardView(node, channelName)
if view == nil {
log.Info("leaderView is not ready, skip", zap.String("channelName", channelName), zap.Int64("node", node))
continue
}
delegatorList := c.dist.ChannelDistManager.GetByFilter(meta.WithReplica2Channel(replica))
for _, d := range delegatorList {
view := d.View
targetVersion := c.targetMgr.GetCollectionTargetVersion(ctx, collectionID, meta.CurrentTarget)
if view.TargetVersion != targetVersion {
// before shard delegator update it's readable version, skip release segment
log.RatedInfo(20, "before shard delegator update it's readable version, skip release segment",
zap.String("channelName", channelName),
zap.Int64("nodeID", node),
zap.String("channelName", view.Channel),
zap.Int64("nodeID", view.ID),
zap.Int64("leaderVersion", view.TargetVersion),
zap.Int64("currentVersion", targetVersion),
)
@ -300,51 +294,32 @@ func (c *SegmentChecker) findRepeatedSealedSegments(ctx context.Context, replica
return segments
}
func (c *SegmentChecker) filterExistedOnLeader(replica *meta.Replica, segments []*meta.Segment) []*meta.Segment {
func (c *SegmentChecker) filterInUsedByDelegator(replica *meta.Replica, segments []*meta.Segment) []*meta.Segment {
filtered := make([]*meta.Segment, 0, len(segments))
delegatorList := c.dist.ChannelDistManager.GetByFilter(meta.WithReplica2Channel(replica))
ch2DelegatorList := lo.GroupBy(delegatorList, func(d *meta.DmChannel) string {
return d.View.Channel
})
for _, s := range segments {
leaderID, ok := c.dist.ChannelDistManager.GetShardLeader(replica, s.GetInsertChannel())
if !ok {
delegatorList := ch2DelegatorList[s.GetInsertChannel()]
if len(delegatorList) == 0 {
// skip deduplication if delegator is not found
continue
}
view := c.dist.LeaderViewManager.GetLeaderShardView(leaderID, s.GetInsertChannel())
if view == nil {
continue
usedByDelegator := false
for _, delegator := range delegatorList {
seg, ok := delegator.View.Segments[s.GetID()]
if ok && seg.NodeID == s.Node {
// if this segment is serving on leader, do not remove it for search available
usedByDelegator = true
break
}
}
seg, ok := view.Segments[s.GetID()]
if ok && seg.NodeID == s.Node {
// if this segment is serving on leader, do not remove it for search available
continue
if !usedByDelegator {
filtered = append(filtered, s)
}
filtered = append(filtered, s)
}
return filtered
}
func (c *SegmentChecker) filterSegmentInUse(ctx context.Context, replica *meta.Replica, segments []*meta.Segment) []*meta.Segment {
filtered := make([]*meta.Segment, 0, len(segments))
for _, s := range segments {
leaderID, ok := c.dist.ChannelDistManager.GetShardLeader(replica, s.GetInsertChannel())
if !ok {
continue
}
view := c.dist.LeaderViewManager.GetLeaderShardView(leaderID, s.GetInsertChannel())
if view == nil {
continue
}
currentTargetVersion := c.targetMgr.GetCollectionTargetVersion(ctx, s.CollectionID, meta.CurrentTarget)
partition := c.meta.CollectionManager.GetPartition(ctx, s.PartitionID)
// if delegator has valid target version, and before it update to latest readable version, skip release it's sealed segment
// Notice: if syncTargetVersion stuck, segment on delegator won't be released
readableVersionNotUpdate := view.TargetVersion != initialTargetVersion && view.TargetVersion < currentTargetVersion
if partition != nil && readableVersionNotUpdate {
// leader view version hasn't been updated, segment maybe still in use
continue
}
filtered = append(filtered, s)
}
return filtered
}
@ -361,7 +336,7 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
plans := make([]balance.SegmentAssignPlan, 0)
for shard, segments := range shardSegments {
// if channel is not subscribed yet, skip load segments
leader := c.dist.LeaderViewManager.GetLatestShardLeaderByFilter(meta.WithReplica2LeaderView(replica), meta.WithChannelName2LeaderView(shard))
leader := c.dist.ChannelDistManager.GetShardLeader(shard, replica)
if leader == nil {
continue
}

View File

@ -35,6 +35,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/v2/kv"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
@ -144,8 +145,15 @@ func (suite *SegmentCheckerTestSuite) TestLoadSegments() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
// set dist
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
checker.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{}))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{ID: 2, CollectionID: 1, Channel: "test-insert-channel", Version: 1, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -234,9 +242,16 @@ func (suite *SegmentCheckerTestSuite) TestReleaseSegments() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
// set dist
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
checker.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, map[int64]*meta.Segment{}))
checker.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 2, 1, 1, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{ID: 2, CollectionID: 1, Channel: "test-insert-channel", Version: 1, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -276,10 +291,17 @@ func (suite *SegmentCheckerTestSuite) TestReleaseRepeatedSegments() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
// set dist
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
checker.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}))
checker.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 1, 1, 1, "test-insert-channel"))
checker.dist.SegmentDistManager.Update(2, utils.CreateTestSegment(1, 1, 1, 1, 2, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}),
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -293,7 +315,15 @@ func (suite *SegmentCheckerTestSuite) TestReleaseRepeatedSegments() {
suite.Equal(tasks[0].Priority(), task.TaskPriorityLow)
// test less version exist on leader
checker.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 1}, map[int64]*meta.Segment{}))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 1}, map[int64]*meta.Segment{}),
})
tasks = checker.Check(context.TODO())
suite.Len(tasks, 0)
}
@ -335,9 +365,16 @@ func (suite *SegmentCheckerTestSuite) TestReleaseDirtySegments() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, int64(1))
// set dist
checker.dist.ChannelDistManager.Update(2, utils.CreateTestChannel(1, 2, 1, "test-insert-channel"))
checker.dist.LeaderViewManager.Update(2, utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}))
checker.dist.SegmentDistManager.Update(2, utils.CreateTestSegment(1, 1, 1, 1, 1, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{1: 2}, map[int64]*meta.Segment{}),
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -379,22 +416,27 @@ func (suite *SegmentCheckerTestSuite) TestSkipReleaseSealedSegments() {
checker.targetMgr.UpdateCollectionNextTarget(ctx, collectionID)
readableVersion := checker.targetMgr.GetCollectionTargetVersion(ctx, collectionID, meta.CurrentTarget)
// test less target version exist on leader,meet segment doesn't exit in target, segment should be released
// test less target version exist on leader,meet segment doesn't exit in target, segment shouldn't be released
nodeID := int64(2)
segmentID := int64(1)
checker.dist.ChannelDistManager.Update(nodeID, utils.CreateTestChannel(collectionID, nodeID, segmentID, "test-insert-channel"))
view := utils.CreateTestLeaderView(nodeID, collectionID, "test-insert-channel", map[int64]int64{segmentID: 2}, map[int64]*meta.Segment{})
view.TargetVersion = readableVersion - 1
checker.dist.LeaderViewManager.Update(nodeID, view)
checker.dist.SegmentDistManager.Update(nodeID, utils.CreateTestSegment(collectionID, partitionID, segmentID, nodeID, 2, "test-insert-channel"))
tasks := checker.Check(context.TODO())
suite.Len(tasks, 0)
checker.dist.ChannelDistManager.Update(nodeID, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: nodeID,
CollectionID: collectionID,
Channel: "test-insert-channel",
TargetVersion: readableVersion - 1,
Segments: map[int64]*querypb.SegmentDist{segmentID: {NodeID: nodeID}},
},
})
// test leader's target version update to latest,meet segment doesn't exit in target, segment should be released
view = utils.CreateTestLeaderView(nodeID, collectionID, "test-insert-channel", map[int64]int64{1: 3}, map[int64]*meta.Segment{})
view.TargetVersion = readableVersion
checker.dist.LeaderViewManager.Update(2, view)
tasks = checker.Check(context.TODO())
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
suite.Len(tasks[0].Actions(), 1)
action, ok := tasks[0].Actions()[0].(*task.SegmentAction)
@ -404,21 +446,6 @@ func (suite *SegmentCheckerTestSuite) TestSkipReleaseSealedSegments() {
suite.EqualValues(segmentID, action.GetSegmentID())
suite.EqualValues(nodeID, action.Node())
suite.Equal(tasks[0].Priority(), task.TaskPriorityNormal)
// test leader with initialTargetVersion, meet segment doesn't exit in target, segment should be released
view = utils.CreateTestLeaderView(nodeID, collectionID, "test-insert-channel", map[int64]int64{1: 3}, map[int64]*meta.Segment{})
view.TargetVersion = initialTargetVersion
checker.dist.LeaderViewManager.Update(2, view)
tasks = checker.Check(context.TODO())
suite.Len(tasks, 1)
suite.Len(tasks[0].Actions(), 1)
action, ok = tasks[0].Actions()[0].(*task.SegmentAction)
suite.True(ok)
suite.EqualValues(1, tasks[0].ReplicaID())
suite.Equal(task.ActionTypeReduce, action.Type())
suite.EqualValues(segmentID, action.GetSegmentID())
suite.EqualValues(nodeID, action.Node())
suite.Equal(tasks[0].Priority(), task.TaskPriorityNormal)
}
func (suite *SegmentCheckerTestSuite) TestReleaseGrowingSegments() {
@ -458,13 +485,25 @@ func (suite *SegmentCheckerTestSuite) TestReleaseGrowingSegments() {
growingSegments[4] = utils.CreateTestSegment(1, 1, 4, 2, 1, "test-insert-channel")
growingSegments[4].SegmentInfo.StartPosition = &msgpb.MsgPosition{Timestamp: 11}
dmChannel := utils.CreateTestChannel(1, 2, 1, "test-insert-channel")
dmChannel.UnflushedSegmentIds = []int64{2, 3}
checker.dist.ChannelDistManager.Update(2, dmChannel)
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{3: 2}, growingSegments)
view.TargetVersion = checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget)
checker.dist.LeaderViewManager.Update(2, view)
checker.dist.SegmentDistManager.Update(2, utils.CreateTestSegment(1, 1, 3, 2, 2, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
UnflushedSegmentIds: []int64{2, 3},
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 2}},
GrowingSegments: growingSegments,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 2)
@ -524,13 +563,25 @@ func (suite *SegmentCheckerTestSuite) TestReleaseCompactedGrowingSegments() {
growingSegments[4] = utils.CreateTestSegment(1, 1, 4, 2, 1, "test-insert-channel")
growingSegments[4].SegmentInfo.StartPosition = &msgpb.MsgPosition{Timestamp: 11}
dmChannel := utils.CreateTestChannel(1, 2, 1, "test-insert-channel")
dmChannel.UnflushedSegmentIds = []int64{2, 3}
checker.dist.ChannelDistManager.Update(2, dmChannel)
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{3: 2}, growingSegments)
view.TargetVersion = checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget)
checker.dist.LeaderViewManager.Update(2, view)
checker.dist.SegmentDistManager.Update(2, utils.CreateTestSegment(1, 1, 3, 2, 2, "test-insert-channel"))
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
UnflushedSegmentIds: []int64{2, 3},
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 2}},
GrowingSegments: growingSegments,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 1)
@ -572,18 +623,45 @@ func (suite *SegmentCheckerTestSuite) TestSkipReleaseGrowingSegments() {
growingSegments[2] = utils.CreateTestSegment(1, 1, 2, 2, 0, "test-insert-channel")
growingSegments[2].SegmentInfo.StartPosition = &msgpb.MsgPosition{Timestamp: 2}
dmChannel := utils.CreateTestChannel(1, 2, 1, "test-insert-channel")
dmChannel.UnflushedSegmentIds = []int64{2, 3}
checker.dist.ChannelDistManager.Update(2, dmChannel)
view := utils.CreateTestLeaderView(2, 1, "test-insert-channel", map[int64]int64{}, growingSegments)
view.TargetVersion = checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget) - 1
checker.dist.LeaderViewManager.Update(2, view)
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
UnflushedSegmentIds: []int64{2, 3},
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget) - 1,
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 2}},
GrowingSegments: growingSegments,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
tasks := checker.Check(context.TODO())
suite.Len(tasks, 0)
view.TargetVersion = checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget)
checker.dist.LeaderViewManager.Update(2, view)
checker.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 1,
ChannelName: "test-insert-channel",
UnflushedSegmentIds: []int64{2, 3},
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: 1,
Channel: "test-insert-channel",
TargetVersion: checker.targetMgr.GetCollectionTargetVersion(ctx, int64(1), meta.CurrentTarget),
Segments: map[int64]*querypb.SegmentDist{3: {NodeID: 2}},
GrowingSegments: growingSegments,
},
})
tasks = checker.Check(context.TODO())
suite.Len(tasks, 1)
suite.Len(tasks[0].Actions(), 1)

View File

@ -36,14 +36,15 @@ type Controller interface {
}
type ControllerImpl struct {
mu sync.RWMutex
handlers map[int64]*distHandler
client session.Cluster
nodeManager *session.NodeManager
dist *meta.DistributionManager
targetMgr meta.TargetManagerInterface
scheduler task.Scheduler
syncTargetVersionFn TriggerUpdateTargetVersion
mu sync.RWMutex
handlers map[int64]*distHandler
client session.Cluster
nodeManager *session.NodeManager
dist *meta.DistributionManager
targetMgr meta.TargetManagerInterface
scheduler task.Scheduler
notifyFunc NotifyDelegatorChanges
}
func (dc *ControllerImpl) StartDistInstance(ctx context.Context, nodeID int64) {
@ -53,7 +54,7 @@ func (dc *ControllerImpl) StartDistInstance(ctx context.Context, nodeID int64) {
log.Info("node has started", zap.Int64("nodeID", nodeID))
return
}
h := newDistHandler(ctx, nodeID, dc.client, dc.nodeManager, dc.scheduler, dc.dist, dc.targetMgr, dc.syncTargetVersionFn)
h := newDistHandler(ctx, nodeID, dc.client, dc.nodeManager, dc.scheduler, dc.dist, dc.targetMgr, dc.notifyFunc)
dc.handlers[nodeID] = h
}
@ -101,15 +102,15 @@ func NewDistController(
dist *meta.DistributionManager,
targetMgr meta.TargetManagerInterface,
scheduler task.Scheduler,
syncTargetVersionFn TriggerUpdateTargetVersion,
notifyFunc NotifyDelegatorChanges,
) *ControllerImpl {
return &ControllerImpl{
handlers: make(map[int64]*distHandler),
client: client,
nodeManager: nodeManager,
dist: dist,
targetMgr: targetMgr,
scheduler: scheduler,
syncTargetVersionFn: syncTargetVersionFn,
handlers: make(map[int64]*distHandler),
client: client,
nodeManager: nodeManager,
dist: dist,
targetMgr: targetMgr,
scheduler: scheduler,
notifyFunc: notifyFunc,
}
}

View File

@ -81,8 +81,8 @@ func (suite *DistControllerTestSuite) SetupTest() {
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
suite.mockScheduler = task.NewMockScheduler(suite.T())
suite.mockScheduler.EXPECT().GetExecutedFlag(mock.Anything).Return(nil).Maybe()
syncTargetVersionFn := func(collectionID int64) {}
suite.controller = NewDistController(suite.mockCluster, suite.nodeMgr, distManager, targetManager, suite.mockScheduler, syncTargetVersionFn)
suite.controller = NewDistController(suite.mockCluster, suite.nodeMgr, distManager, targetManager, suite.mockScheduler, func(collectionID ...int64) {})
}
func (suite *DistControllerTestSuite) TearDownSuite() {

View File

@ -27,6 +27,7 @@ import (
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
@ -44,6 +45,8 @@ import (
type TriggerUpdateTargetVersion = func(collectionID int64)
type NotifyDelegatorChanges = func(collectionID ...int64)
type distHandler struct {
nodeID int64
c chan struct{}
@ -57,7 +60,7 @@ type distHandler struct {
stopOnce sync.Once
lastUpdateTs int64
syncTargetVersionFn TriggerUpdateTargetVersion
notifyFunc NotifyDelegatorChanges
}
func (dh *distHandler) start(ctx context.Context) {
@ -140,7 +143,6 @@ func (dh *distHandler) handleDistResp(ctx context.Context, resp *querypb.GetData
)
dh.updateSegmentsDistribution(ctx, resp)
dh.updateChannelsDistribution(ctx, resp)
dh.updateLeaderView(ctx, resp)
}
if dispatchTask {
@ -148,6 +150,10 @@ func (dh *distHandler) handleDistResp(ctx context.Context, resp *querypb.GetData
}
}
func (dh *distHandler) SetNotifyFunc(notifyFunc NotifyDelegatorChanges) {
dh.notifyFunc = notifyFunc
}
func (dh *distHandler) updateSegmentsDistribution(ctx context.Context, resp *querypb.GetDataDistributionResponse) {
updates := make([]*meta.Segment, 0, len(resp.GetSegments()))
for _, s := range resp.GetSegments() {
@ -181,120 +187,129 @@ func (dh *distHandler) updateSegmentsDistribution(ctx context.Context, resp *que
}
func (dh *distHandler) updateChannelsDistribution(ctx context.Context, resp *querypb.GetDataDistributionResponse) {
updates := make([]*meta.DmChannel, 0, len(resp.GetChannels()))
for _, ch := range resp.GetChannels() {
channelInfo := dh.target.GetDmChannel(ctx, ch.GetCollection(), ch.GetChannel(), meta.CurrentTarget)
var channel *meta.DmChannel
if channelInfo == nil {
channel = &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
ChannelName: ch.GetChannel(),
CollectionID: ch.GetCollection(),
},
Node: resp.GetNodeID(),
Version: ch.GetVersion(),
}
} else {
channel = &meta.DmChannel{
VchannelInfo: channelInfo.VchannelInfo,
Node: resp.GetNodeID(),
Version: ch.GetVersion(),
}
}
updates = append(updates, channel)
}
dh.dist.ChannelDistManager.Update(resp.GetNodeID(), updates...)
}
func (dh *distHandler) updateLeaderView(ctx context.Context, resp *querypb.GetDataDistributionResponse) {
updates := make([]*meta.LeaderView, 0, len(resp.GetLeaderViews()))
channels := lo.SliceToMap(resp.GetChannels(), func(channel *querypb.ChannelVersionInfo) (string, *querypb.ChannelVersionInfo) {
return channel.GetChannel(), channel
channelMap := lo.SliceToMap(resp.GetChannels(), func(ch *querypb.ChannelVersionInfo) (string, *querypb.ChannelVersionInfo) {
return ch.GetChannel(), ch
})
collectionsToSync := typeutil.NewUniqueSet()
updates := make([]*meta.DmChannel, 0, len(resp.GetChannels()))
for _, lview := range resp.GetLeaderViews() {
segments := make(map[int64]*meta.Segment)
for ID, position := range lview.GrowingSegments {
// To maintain compatibility with older versions of QueryNode,
// QueryCoord should neither process nor interact with L0 segments.
segmentInfo := dh.target.GetSealedSegment(ctx, lview.GetCollection(), ID, meta.CurrentTargetFirst)
if segmentInfo != nil && segmentInfo.GetLevel() == datapb.SegmentLevel_L0 {
continue
}
segments[ID] = &meta.Segment{
SegmentInfo: &datapb.SegmentInfo{
ID: ID,
CollectionID: lview.GetCollection(),
StartPosition: position,
InsertChannel: lview.GetChannel(),
},
Node: resp.NodeID,
}
}
var version int64
channel, ok := channels[lview.GetChannel()]
if ok {
version = channel.GetVersion()
}
view := &meta.LeaderView{
ID: resp.GetNodeID(),
CollectionID: lview.GetCollection(),
Channel: lview.GetChannel(),
Version: version,
Segments: lview.GetSegmentDist(),
GrowingSegments: segments,
TargetVersion: lview.TargetVersion,
NumOfGrowingRows: lview.GetNumOfGrowingRows(),
PartitionStatsVersions: lview.PartitionStatsVersions,
}
updates = append(updates, view)
// check leader serviceable
if err := utils.CheckDelegatorDataReady(dh.nodeManager, dh.target, view, meta.CurrentTarget); err != nil {
view.UnServiceableError = err
log.Ctx(ctx).
WithRateGroup(fmt.Sprintf("distHandler.updateLeaderView.%s", view.Channel), 1, 60).
RatedInfo(10, "leader is not available due to distribution not ready",
zap.Int64("collectionID", view.CollectionID),
zap.Int64("nodeID", view.ID),
zap.String("channel", view.Channel),
zap.Error(err))
channel, ok := channelMap[lview.GetChannel()]
if !ok {
// unreachable path, querynode should return leader view and channel dist at same time
log.Ctx(ctx).WithRateGroup("distHandler.updateChannelsDistribution", 1, 60).
RatedInfo(30, "channel not found in distribution",
zap.Int64("collectionID", lview.GetCollection()),
zap.String("channel", lview.GetChannel()))
continue
}
delegatorVersion := channel.GetVersion()
// if target version hasn't been synced, delegator will get empty readable segment list
// so shard leader should be unserviceable until target version is synced
currentTargetVersion := dh.target.GetCollectionTargetVersion(ctx, lview.GetCollection(), meta.CurrentTarget)
if lview.TargetVersion <= 0 {
err := merr.WrapErrServiceInternal(fmt.Sprintf("target version mismatch, collection: %d, channel: %s, current target version: %v, leader version: %v",
lview.GetCollection(), lview.GetChannel(), currentTargetVersion, lview.TargetVersion))
// Get or create channel info
collectionID := lview.GetCollection()
channelName := lview.GetChannel()
channelInfo := dh.target.GetDmChannel(ctx, collectionID, channelName, meta.CurrentTarget)
var vChannelInfo *datapb.VchannelInfo
if channelInfo != nil {
vChannelInfo = channelInfo.VchannelInfo
} else {
vChannelInfo = &datapb.VchannelInfo{
ChannelName: channelName,
CollectionID: collectionID,
}
}
view.UnServiceableError = err
// make dist handler pull next distribution until all delegator is serviceable
// Pre-allocate growing segments map
growings := lo.MapValues(lview.GetGrowingSegments(), func(position *msgpb.MsgPosition, id int64) *meta.Segment {
return &meta.Segment{
SegmentInfo: &datapb.SegmentInfo{
ID: id,
CollectionID: collectionID,
StartPosition: position,
InsertChannel: channelName,
},
Node: resp.GetNodeID(),
}
})
// Update DmChannel and register shard leader in same loop
dmChannel := &meta.DmChannel{
VchannelInfo: vChannelInfo,
Node: resp.NodeID,
Version: delegatorVersion,
View: &meta.LeaderView{
ID: resp.NodeID,
CollectionID: collectionID,
Channel: channelName,
Version: delegatorVersion,
Segments: lview.GetSegmentDist(),
GrowingSegments: growings,
NumOfGrowingRows: lview.GetNumOfGrowingRows(),
PartitionStatsVersions: lview.PartitionStatsVersions,
TargetVersion: lview.GetTargetVersion(),
Status: lview.GetStatus(),
},
}
updates = append(updates, dmChannel)
serviceable := checkDelegatorServiceable(ctx, dh, dmChannel.View)
// trigger pull next target until shard leader is ready
if !serviceable {
dh.lastUpdateTs = 0
collectionsToSync.Insert(lview.Collection)
log.Ctx(ctx).
WithRateGroup(fmt.Sprintf("distHandler.updateLeaderView.%s", view.Channel), 1, 60).
RatedInfo(10, "leader is not available due to target version not ready",
zap.Int64("collectionID", view.CollectionID),
zap.Int64("nodeID", view.ID),
zap.String("channel", view.Channel),
zap.Error(err))
}
}
dh.dist.LeaderViewManager.Update(resp.GetNodeID(), updates...)
newLeaderOnNode := dh.dist.ChannelDistManager.Update(resp.GetNodeID(), updates...)
if dh.notifyFunc != nil {
collectionIDs := typeutil.NewUniqueSet()
for _, ch := range newLeaderOnNode {
collectionIDs.Insert(ch.VchannelInfo.CollectionID)
}
dh.notifyFunc(collectionIDs.Collect()...)
}
}
// segment and channel already loaded, trigger target observer to update
collectionsToSync.Range(func(collection int64) bool {
dh.syncTargetVersionFn(collection)
func checkDelegatorServiceable(ctx context.Context, dh *distHandler, view *meta.LeaderView) bool {
log := log.Ctx(ctx).
WithRateGroup(fmt.Sprintf("distHandler.updateChannelsDistribution.%s", view.Channel), 1, 60).
With(
zap.Int64("nodeID", view.ID),
zap.String("channel", view.Channel),
)
if status := view.Status; status != nil {
if !status.GetServiceable() {
log.RatedInfo(10, "delegator is not serviceable", zap.Int64("queryViewVersion", view.TargetVersion))
return false
}
return true
})
}
// check leader data ready for version before 2.5.8
if err := utils.CheckDelegatorDataReady(dh.nodeManager, dh.target, view, meta.CurrentTarget); err != nil {
log.RatedInfo(10, "delegator is not serviceable due to distribution not ready", zap.Error(err))
view.Status = &querypb.LeaderViewStatus{
Serviceable: false,
}
return false
}
// if target version hasn't been synced, delegator will get empty readable segment list
// so shard leader should be unserviceable until target version is synced
currentTargetVersion := dh.target.GetCollectionTargetVersion(ctx, view.CollectionID, meta.CurrentTarget)
if view.TargetVersion <= 0 {
log.RatedInfo(10, "delegator is not serviceable due to target version not ready",
zap.Int64("currentTargetVersion", currentTargetVersion),
zap.Int64("leaderTargetVersion", view.TargetVersion))
view.Status = &querypb.LeaderViewStatus{
Serviceable: false,
}
return false
}
view.Status = &querypb.LeaderViewStatus{
Serviceable: true,
}
return true
}
func (dh *distHandler) getDistribution(ctx context.Context) (*querypb.GetDataDistributionResponse, error) {
@ -337,17 +352,17 @@ func newDistHandler(
scheduler task.Scheduler,
dist *meta.DistributionManager,
targetMgr meta.TargetManagerInterface,
syncTargetVersionFn TriggerUpdateTargetVersion,
notifyFunc NotifyDelegatorChanges,
) *distHandler {
h := &distHandler{
nodeID: nodeID,
c: make(chan struct{}),
client: client,
nodeManager: nodeManager,
scheduler: scheduler,
dist: dist,
target: targetMgr,
syncTargetVersionFn: syncTargetVersionFn,
nodeID: nodeID,
c: make(chan struct{}),
client: client,
nodeManager: nodeManager,
scheduler: scheduler,
dist: dist,
target: targetMgr,
notifyFunc: notifyFunc,
}
h.wg.Add(1)
go h.start(ctx)

View File

@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"go.uber.org/atomic"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
@ -49,8 +50,7 @@ type DistHandlerSuite struct {
executedFlagChan chan struct{}
dist *meta.DistributionManager
target *meta.MockTargetManager
handler *distHandler
handler *distHandler
}
func (suite *DistHandlerSuite) SetupSuite() {
@ -114,8 +114,7 @@ func (suite *DistHandlerSuite) TestBasic() {
LastModifyTs: 1,
}, nil)
syncTargetVersionFn := func(collectionID int64) {}
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, syncTargetVersionFn)
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, func(collectionID ...int64) {})
defer suite.handler.stop()
time.Sleep(3 * time.Second)
@ -135,8 +134,7 @@ func (suite *DistHandlerSuite) TestGetDistributionFailed() {
}))
suite.client.EXPECT().GetDataDistribution(mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("fake error"))
syncTargetVersionFn := func(collectionID int64) {}
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, syncTargetVersionFn)
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, func(collectionID ...int64) {})
defer suite.handler.stop()
time.Sleep(3 * time.Second)
@ -184,13 +182,85 @@ func (suite *DistHandlerSuite) TestForcePullDist() {
LastModifyTs: 1,
}, nil)
suite.executedFlagChan <- struct{}{}
syncTargetVersionFn := func(collectionID int64) {}
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, syncTargetVersionFn)
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, func(collectionID ...int64) {})
defer suite.handler.stop()
time.Sleep(300 * time.Millisecond)
}
func (suite *DistHandlerSuite) TestHandlerWithSyncDelegatorChanges() {
if suite.dispatchMockCall != nil {
suite.dispatchMockCall.Unset()
suite.dispatchMockCall = nil
}
suite.target.EXPECT().GetSealedSegmentsByChannel(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(map[int64]*datapb.SegmentInfo{}).Maybe()
suite.dispatchMockCall = suite.scheduler.EXPECT().Dispatch(mock.Anything).Maybe()
suite.nodeManager.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
Hostname: "localhost",
}))
// Test scenario: update segments and channels distribution without replicaMgr
suite.client.EXPECT().GetDataDistribution(mock.Anything, mock.Anything, mock.Anything).Return(&querypb.GetDataDistributionResponse{
Status: merr.Success(),
NodeID: 1,
Channels: []*querypb.ChannelVersionInfo{
{
Channel: "test-channel-1",
Collection: 1,
Version: 1,
},
},
Segments: []*querypb.SegmentVersionInfo{
{
ID: 1,
Collection: 1,
Partition: 1,
Channel: "test-channel-1",
Version: 1,
},
},
LeaderViews: []*querypb.LeaderView{
{
Collection: 1,
Channel: "test-channel-1",
TargetVersion: 1011,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
},
LastModifyTs: 2, // Different from previous test to ensure update happens
}, nil)
notifyCounter := atomic.NewInt32(0)
notifyFunc := func(collectionID ...int64) {
suite.Require().Equal(1, len(collectionID))
suite.Require().Equal(int64(1), collectionID[0])
notifyCounter.Inc()
}
suite.handler = newDistHandler(suite.ctx, suite.nodeID, suite.client, suite.nodeManager, suite.scheduler, suite.dist, suite.target, notifyFunc)
defer suite.handler.stop()
// Wait for distribution to be processed
time.Sleep(1000 * time.Millisecond)
// Verify that the distributions were updated correctly
segments := suite.dist.SegmentDistManager.GetByFilter(meta.WithNodeID(1))
suite.Require().Equal(1, len(segments))
suite.Require().Equal(int64(1), segments[0].SegmentInfo.ID)
channels := suite.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(1))
suite.Require().Equal(1, len(channels))
suite.Require().Equal("test-channel-1", channels[0].VchannelInfo.ChannelName)
// Verify that the notification was called
suite.Require().Greater(notifyCounter.Load(), int32(0))
}
func TestDistHandlerSuite(t *testing.T) {
suite.Run(t, new(DistHandlerSuite))
}

View File

@ -487,10 +487,10 @@ func (s *Server) fillReplicaInfo(ctx context.Context, replica *meta.Replica, wit
}
for _, channel := range channels {
leader, ok := s.dist.ChannelDistManager.GetShardLeader(replica, channel.GetChannelName())
leader := s.dist.ChannelDistManager.GetShardLeader(channel.ChannelName, replica)
var leaderInfo *session.NodeInfo
if ok {
leaderInfo = s.nodeMgr.Get(leader)
if leader != nil {
leaderInfo = s.nodeMgr.Get(leader.Node)
}
if leaderInfo == nil {
log.Warn("failed to get shard leader for shard",
@ -501,10 +501,10 @@ func (s *Server) fillReplicaInfo(ctx context.Context, replica *meta.Replica, wit
}
shard := &milvuspb.ShardReplica{
LeaderID: leader,
LeaderID: leader.Node,
LeaderAddr: leaderInfo.Addr(),
DmChannelName: channel.GetChannelName(),
NodeIds: []int64{leader},
NodeIds: []int64{leader.Node},
}
if withShardNodes {
shardNodes := lo.FilterMap(segments, func(segment *meta.Segment, _ int) (int64, bool) {

View File

@ -1604,24 +1604,32 @@ func (suite *JobSuite) updateChannelDist(ctx context.Context, collection int64,
segments := lo.Flatten(lo.Values(suite.segments[collection]))
replicas := suite.meta.ReplicaManager.GetByCollection(ctx, collection)
targetVersion := suite.targetMgr.GetCollectionTargetVersion(ctx, collection, meta.CurrentTargetFirst)
for _, replica := range replicas {
if loaded {
i := 0
for _, node := range replica.GetNodes() {
suite.dist.ChannelDistManager.Update(node, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
}))
suite.dist.LeaderViewManager.Update(node, &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
Segments: lo.SliceToMap(segments, func(segment int64) (int64, *querypb.SegmentDist) {
return segment, &querypb.SegmentDist{
NodeID: node,
Version: time.Now().Unix(),
}
}),
suite.dist.ChannelDistManager.Update(node, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
},
Node: node,
View: &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
Segments: lo.SliceToMap(segments, func(segment int64) (int64, *querypb.SegmentDist) {
return segment, &querypb.SegmentDist{
NodeID: node,
Version: time.Now().Unix(),
}
}),
TargetVersion: targetVersion,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
i++
if i >= len(channels) {
@ -1631,7 +1639,6 @@ func (suite *JobSuite) updateChannelDist(ctx context.Context, collection int64,
} else {
for _, node := range replica.GetNodes() {
suite.dist.ChannelDistManager.Update(node)
suite.dist.LeaderViewManager.Update(node)
}
}
}

View File

@ -24,10 +24,48 @@ import (
"github.com/milvus-io/milvus/internal/util/metrics"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type LeaderView struct {
ID int64
CollectionID int64
Channel string
Version int64
Segments map[int64]*querypb.SegmentDist
GrowingSegments map[int64]*Segment
TargetVersion int64
NumOfGrowingRows int64
PartitionStatsVersions map[int64]int64
Status *querypb.LeaderViewStatus
}
func (view *LeaderView) Clone() *LeaderView {
segments := make(map[int64]*querypb.SegmentDist)
for k, v := range view.Segments {
segments[k] = v
}
growings := make(map[int64]*Segment)
for k, v := range view.GrowingSegments {
growings[k] = v
}
return &LeaderView{
ID: view.ID,
CollectionID: view.CollectionID,
Channel: view.Channel,
Version: view.Version,
Segments: segments,
GrowingSegments: growings,
TargetVersion: view.TargetVersion,
NumOfGrowingRows: view.NumOfGrowingRows,
PartitionStatsVersions: view.PartitionStatsVersions,
}
}
type channelDistCriterion struct {
nodeIDs typeutil.Set[int64]
collectionID int64
@ -116,6 +154,7 @@ type DmChannel struct {
*datapb.VchannelInfo
Node int64
Version int64
View *LeaderView
}
func DmChannelFromVChannel(channel *datapb.VchannelInfo) *DmChannel {
@ -129,9 +168,23 @@ func (channel *DmChannel) Clone() *DmChannel {
VchannelInfo: proto.Clone(channel.VchannelInfo).(*datapb.VchannelInfo),
Node: channel.Node,
Version: channel.Version,
View: &LeaderView{
ID: channel.View.ID,
CollectionID: channel.View.CollectionID,
Channel: channel.View.Channel,
Version: channel.View.Version,
Status: proto.Clone(channel.View.Status).(*querypb.LeaderViewStatus),
},
}
}
func (channel *DmChannel) IsServiceable() bool {
if channel.View == nil {
return false
}
return channel.View.Status.GetServiceable()
}
func newDmChannelMetricsFrom(channel *DmChannel) *metricsinfo.DmChannel {
dmChannel := metrics.NewDMChannelFrom(channel.VchannelInfo)
dmChannel.NodeID = channel.Node
@ -171,6 +224,15 @@ func composeNodeChannels(channels ...*DmChannel) nodeChannels {
}
}
type ChannelDistManagerInterface interface {
GetByFilter(filters ...ChannelDistFilter) []*DmChannel
GetByCollectionAndFilter(collectionID int64, filters ...ChannelDistFilter) []*DmChannel
Update(nodeID typeutil.UniqueID, channels ...*DmChannel) []*DmChannel
GetShardLeader(channelName string, replica *Replica) *DmChannel
GetChannelDist(collectionID int64) []*metricsinfo.DmChannel
GetLeaderView(collectionID int64) []*metricsinfo.LeaderView
}
type ChannelDistManager struct {
rwmutex sync.RWMutex
@ -188,39 +250,6 @@ func NewChannelDistManager() *ChannelDistManager {
}
}
// todo by liuwei: should consider the case of duplicate leader exists
// GetShardLeader returns the node whthin the given replicaNodes and subscribing the given shard,
// returns (0, false) if not found.
func (m *ChannelDistManager) GetShardLeader(replica *Replica, shard string) (int64, bool) {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
for _, node := range replica.GetNodes() {
channels := m.channels[node]
_, ok := channels.nameChannel[shard]
if ok {
return node, true
}
}
return 0, false
}
// todo by liuwei: should consider the case of duplicate leader exists
func (m *ChannelDistManager) GetShardLeadersByReplica(replica *Replica) map[string]int64 {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
ret := make(map[string]int64)
for _, node := range replica.GetNodes() {
channels := m.channels[node]
for _, dmc := range channels.collChannels[replica.GetCollectionID()] {
ret[dmc.GetChannelName()] = node
}
}
return ret
}
// return all channels in list which match all given filters
func (m *ChannelDistManager) GetByFilter(filters ...ChannelDistFilter) []*DmChannel {
m.rwmutex.RLock()
@ -272,17 +301,23 @@ func (m *ChannelDistManager) GetByCollectionAndFilter(collectionID int64, filter
return ret
}
func (m *ChannelDistManager) Update(nodeID typeutil.UniqueID, channels ...*DmChannel) {
func (m *ChannelDistManager) Update(nodeID typeutil.UniqueID, channels ...*DmChannel) []*DmChannel {
m.rwmutex.Lock()
defer m.rwmutex.Unlock()
newServiceableChannels := make([]*DmChannel, 0)
for _, channel := range channels {
channel.Node = nodeID
old, ok := m.channels[nodeID].nameChannel[channel.GetChannelName()]
if channel.IsServiceable() && (!ok || !old.IsServiceable()) {
newServiceableChannels = append(newServiceableChannels, channel)
}
}
m.channels[nodeID] = composeNodeChannels(channels...)
m.updateCollectionIndex()
return newServiceableChannels
}
// update secondary index for channel distribution
@ -300,6 +335,45 @@ func (m *ChannelDistManager) updateCollectionIndex() {
}
}
// GetShardLeader return the only one delegator leader which has the highest version in given replica
// if there is no serviceable leader, return the highest version leader
// With specific channel name and replica, return the only one delegator leader
func (m *ChannelDistManager) GetShardLeader(channelName string, replica *Replica) *DmChannel {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
channels := m.collectionIndex[replica.GetCollectionID()]
var candidates *DmChannel
for _, channel := range channels {
if channel.GetChannelName() == channelName && replica.Contains(channel.Node) {
if candidates == nil {
candidates = channel
} else {
// Prioritize serviceability first, then version number
candidatesServiceable := candidates.IsServiceable()
channelServiceable := channel.IsServiceable()
updateNeeded := false
switch {
case !candidatesServiceable && channelServiceable:
// Current candidate is not serviceable but new channel is
updateNeeded = true
case candidatesServiceable == channelServiceable && channel.Version > candidates.Version:
// Same service status but higher version
updateNeeded = true
}
if updateNeeded {
candidates = channel
}
}
}
}
return candidates
}
func (m *ChannelDistManager) GetChannelDist(collectionID int64) []*metricsinfo.DmChannel {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
@ -321,3 +395,56 @@ func (m *ChannelDistManager) GetChannelDist(collectionID int64) []*metricsinfo.D
}
return ret
}
// GetLeaderView returns a slice of LeaderView objects, each representing the state of a leader node.
// It traverses the views map, converts each LeaderView to a metricsinfo.LeaderView, and collects them into a slice.
// The method locks the views map for reading to ensure thread safety.
func (m *ChannelDistManager) GetLeaderView(collectionID int64) []*metricsinfo.LeaderView {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
var ret []*metricsinfo.LeaderView
if collectionID > 0 {
if channels, ok := m.collectionIndex[collectionID]; ok {
for _, channel := range channels {
ret = append(ret, newMetricsLeaderViewFrom(channel.View))
}
}
return ret
}
for _, channels := range m.collectionIndex {
for _, channel := range channels {
ret = append(ret, newMetricsLeaderViewFrom(channel.View))
}
}
return ret
}
func newMetricsLeaderViewFrom(lv *LeaderView) *metricsinfo.LeaderView {
leaderView := &metricsinfo.LeaderView{
LeaderID: lv.ID,
CollectionID: lv.CollectionID,
Channel: lv.Channel,
Version: lv.Version,
SealedSegments: make([]*metricsinfo.Segment, 0, len(lv.Segments)),
GrowingSegments: make([]*metricsinfo.Segment, 0, len(lv.GrowingSegments)),
TargetVersion: lv.TargetVersion,
NumOfGrowingRows: lv.NumOfGrowingRows,
}
for segID, seg := range lv.Segments {
leaderView.SealedSegments = append(leaderView.SealedSegments, &metricsinfo.Segment{
SegmentID: segID,
NodeID: seg.NodeID,
})
}
for _, seg := range lv.GrowingSegments {
leaderView.GrowingSegments = append(leaderView.GrowingSegments, &metricsinfo.Segment{
SegmentID: seg.ID,
NodeID: seg.Node,
})
}
return leaderView
}

View File

@ -25,7 +25,6 @@ import (
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type ChannelDistManagerSuite struct {
@ -42,14 +41,40 @@ func (suite *ChannelDistManagerSuite) SetupSuite() {
suite.collection = 10
suite.nodes = []int64{0, 1, 2}
suite.channels = map[string]*DmChannel{
"dmc0": DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: "dmc0",
}),
"dmc1": DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: "dmc1",
}),
"dmc0": {
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: "dmc0",
},
Node: 0,
Version: 1,
View: &LeaderView{
ID: 1,
CollectionID: suite.collection,
Channel: "dmc0",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
},
"dmc1": {
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: "dmc1",
},
Node: 1,
Version: 1,
View: &LeaderView{
ID: 1,
CollectionID: suite.collection,
Channel: "dmc1",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
},
}
}
@ -101,55 +126,6 @@ func (suite *ChannelDistManagerSuite) TestGetBy() {
suite.Len(channels, 0)
}
func (suite *ChannelDistManagerSuite) TestGetShardLeader() {
replicas := []*Replica{
NewReplica(
&querypb.Replica{
CollectionID: suite.collection,
},
typeutil.NewUniqueSet(suite.nodes[0], suite.nodes[2]),
),
NewReplica(
&querypb.Replica{
CollectionID: suite.collection,
},
typeutil.NewUniqueSet(suite.nodes[1]),
),
}
// Test on replica 0
leader0, ok := suite.dist.GetShardLeader(replicas[0], "dmc0")
suite.True(ok)
suite.Equal(suite.nodes[0], leader0)
leader1, ok := suite.dist.GetShardLeader(replicas[0], "dmc1")
suite.True(ok)
suite.Equal(suite.nodes[2], leader1)
// Test on replica 1
leader0, ok = suite.dist.GetShardLeader(replicas[1], "dmc0")
suite.True(ok)
suite.Equal(suite.nodes[1], leader0)
leader1, ok = suite.dist.GetShardLeader(replicas[1], "dmc1")
suite.True(ok)
suite.Equal(suite.nodes[1], leader1)
// Test no shard leader for given channel
_, ok = suite.dist.GetShardLeader(replicas[0], "invalid-shard")
suite.False(ok)
// Test on replica 0
leaders := suite.dist.GetShardLeadersByReplica(replicas[0])
suite.Len(leaders, 2)
suite.Equal(leaders["dmc0"], suite.nodes[0])
suite.Equal(leaders["dmc1"], suite.nodes[2])
// Test on replica 1
leaders = suite.dist.GetShardLeadersByReplica(replicas[1])
suite.Len(leaders, 2)
suite.Equal(leaders["dmc0"], suite.nodes[1])
suite.Equal(leaders["dmc1"], suite.nodes[1])
}
func (suite *ChannelDistManagerSuite) AssertNames(channels []*DmChannel, names ...string) bool {
for _, channel := range channels {
hasChannel := false
@ -188,22 +164,218 @@ func TestChannelDistManager(t *testing.T) {
suite.Run(t, new(ChannelDistManagerSuite))
}
func TestDmChannelClone(t *testing.T) {
// Test that Clone properly copies the View field including Status
originalChannel := &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "test-channel",
},
Node: 1,
Version: 10,
View: &LeaderView{
ID: 5,
CollectionID: 100,
Channel: "test-channel",
Version: 20,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
}
clonedChannel := originalChannel.Clone()
// Check all fields were properly cloned
assert.Equal(t, originalChannel.GetCollectionID(), clonedChannel.GetCollectionID())
assert.Equal(t, originalChannel.GetChannelName(), clonedChannel.GetChannelName())
assert.Equal(t, originalChannel.Node, clonedChannel.Node)
assert.Equal(t, originalChannel.Version, clonedChannel.Version)
// Check that View was properly cloned
assert.NotNil(t, clonedChannel.View)
assert.Equal(t, originalChannel.View.ID, clonedChannel.View.ID)
assert.Equal(t, originalChannel.View.CollectionID, clonedChannel.View.CollectionID)
assert.Equal(t, originalChannel.View.Channel, clonedChannel.View.Channel)
assert.Equal(t, originalChannel.View.Version, clonedChannel.View.Version)
// Check that Status was properly cloned
assert.NotNil(t, clonedChannel.View.Status)
assert.Equal(t, originalChannel.View.Status.GetServiceable(), clonedChannel.View.Status.GetServiceable())
// Verify that modifying the clone doesn't affect the original
clonedChannel.View.Status.Serviceable = false
assert.True(t, originalChannel.View.Status.GetServiceable())
assert.False(t, clonedChannel.View.Status.GetServiceable())
}
func TestDmChannelIsServiceable(t *testing.T) {
// Test serviceable channel
serviceableChannel := &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "serviceable",
},
View: &LeaderView{
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
}
assert.True(t, serviceableChannel.IsServiceable())
// Test non-serviceable channel
nonServiceableChannel := &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "non-serviceable",
},
View: &LeaderView{
Status: &querypb.LeaderViewStatus{
Serviceable: false,
},
},
}
assert.False(t, nonServiceableChannel.IsServiceable())
}
func (suite *ChannelDistManagerSuite) TestUpdateReturnsNewServiceableChannels() {
dist := NewChannelDistManager()
// Create a non-serviceable channel
nonServiceableChannel := suite.channels["dmc0"].Clone()
nonServiceableChannel.View.Status.Serviceable = false
// Update with non-serviceable channel first
newServiceableChannels := dist.Update(suite.nodes[0], nonServiceableChannel)
suite.Len(newServiceableChannels, 0, "No new serviceable channels should be returned")
// Now update with a serviceable channel
serviceableChannel := nonServiceableChannel.Clone()
serviceableChannel.View.Status.Serviceable = true
newServiceableChannels = dist.Update(suite.nodes[0], serviceableChannel)
suite.Len(newServiceableChannels, 1, "One new serviceable channel should be returned")
suite.Equal("dmc0", newServiceableChannels[0].GetChannelName())
// Update with same serviceable channel should not return it again
newServiceableChannels = dist.Update(suite.nodes[0], serviceableChannel)
suite.Len(newServiceableChannels, 0, "Already serviceable channel should not be returned")
// Add a different channel that's serviceable
newChannel := suite.channels["dmc1"].Clone()
newChannel.View.Status.Serviceable = true
newServiceableChannels = dist.Update(suite.nodes[0], serviceableChannel, newChannel)
suite.Len(newServiceableChannels, 1, "Only the new serviceable channel should be returned")
suite.Equal("dmc1", newServiceableChannels[0].GetChannelName())
}
func (suite *ChannelDistManagerSuite) TestGetShardLeader() {
dist := NewChannelDistManager()
// Create a replica
replicaPB := &querypb.Replica{
ID: 1,
CollectionID: suite.collection,
Nodes: []int64{0, 2},
}
replica := NewReplica(replicaPB)
// Create channels with different versions and serviceability
channel1Node0 := suite.channels["dmc0"].Clone()
channel1Node0.Version = 1
channel1Node0.View.Status.Serviceable = false
channel1Node2 := suite.channels["dmc0"].Clone()
channel1Node2.Node = 2
channel1Node2.Version = 2
channel1Node2.View.Status.Serviceable = false
// Update with non-serviceable channels
dist.Update(0, channel1Node0)
dist.Update(2, channel1Node2)
// Test getting leader with no serviceable channels - should return highest version
leader := dist.GetShardLeader("dmc0", replica)
suite.NotNil(leader)
suite.Equal(int64(2), leader.Node)
suite.Equal(int64(2), leader.Version)
// Now make one channel serviceable
channel1Node0.View.Status.Serviceable = true
dist.Update(0, channel1Node0)
// Test that serviceable channel is preferred even with lower version
leader = dist.GetShardLeader("dmc0", replica)
suite.NotNil(leader)
suite.Equal(int64(0), leader.Node)
suite.Equal(int64(1), leader.Version)
suite.True(leader.IsServiceable())
// Make both channels serviceable but with different versions
channel1Node2.View.Status.Serviceable = true
dist.Update(2, channel1Node2)
// Test that highest version is chosen among serviceable channels
leader = dist.GetShardLeader("dmc0", replica)
suite.NotNil(leader)
suite.Equal(int64(2), leader.Node)
suite.Equal(int64(2), leader.Version)
suite.True(leader.IsServiceable())
// Test channel not in replica
// Create a new replica with different nodes
replicaPB = &querypb.Replica{
ID: 1,
CollectionID: suite.collection,
Nodes: []int64{1},
}
replicaWithDifferentNodes := NewReplica(replicaPB)
leader = dist.GetShardLeader("dmc0", replicaWithDifferentNodes)
suite.Nil(leader)
// Test nonexistent channel
leader = dist.GetShardLeader("nonexistent", replica)
suite.Nil(leader)
}
func TestGetChannelDistJSON(t *testing.T) {
manager := NewChannelDistManager()
channel1 := DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "channel-1",
})
channel1.Node = 1
channel1.Version = 1
channel2 := DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 200,
ChannelName: "channel-2",
})
channel2.Node = 2
channel2.Version = 1
channel1 := &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "channel-1",
},
Node: 1,
Version: 1,
View: &LeaderView{
ID: 1,
CollectionID: 100,
Channel: "channel-1",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
}
channel2 := &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 200,
ChannelName: "channel-2",
},
Node: 2,
Version: 1,
View: &LeaderView{
ID: 1,
CollectionID: 200,
Channel: "channel-2",
Version: 1,
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
}
manager.Update(1, channel1)
manager.Update(2, channel2)

View File

@ -25,16 +25,14 @@ import (
)
type DistributionManager struct {
*SegmentDistManager
*ChannelDistManager
*LeaderViewManager
SegmentDistManager SegmentDistManagerInterface
ChannelDistManager ChannelDistManagerInterface
}
func NewDistributionManager() *DistributionManager {
return &DistributionManager{
SegmentDistManager: NewSegmentDistManager(),
ChannelDistManager: NewChannelDistManager(),
LeaderViewManager: NewLeaderViewManager(),
}
}
@ -43,9 +41,9 @@ func NewDistributionManager() *DistributionManager {
// If there are no segments, channels, or leader views, it returns an empty string.
// In case of an error during JSON marshaling, it returns the error.
func (dm *DistributionManager) GetDistributionJSON(collectionID int64) string {
segments := dm.GetSegmentDist(collectionID)
channels := dm.GetChannelDist(collectionID)
leaderView := dm.GetLeaderView(collectionID)
segments := dm.SegmentDistManager.GetSegmentDist(collectionID)
channels := dm.ChannelDistManager.GetChannelDist(collectionID)
leaderView := dm.ChannelDistManager.GetLeaderView(collectionID)
dist := &metricsinfo.QueryCoordDist{
Segments: segments,

View File

@ -43,42 +43,36 @@ func TestGetDistributionJSON(t *testing.T) {
manager.SegmentDistManager.Update(2, segment2)
// Add some channels to the ChannelDistManager
channel1 := DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "channel-1",
manager.ChannelDistManager.Update(1, &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "channel-1",
},
Node: 1,
Version: 1,
View: &LeaderView{
ID: 1,
CollectionID: 100,
Channel: "channel-1",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1}},
},
})
channel1.Node = 1
channel1.Version = 1
channel2 := DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 200,
ChannelName: "channel-2",
manager.ChannelDistManager.Update(2, &DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 200,
ChannelName: "channel-2",
},
Node: 2,
Version: 1,
View: &LeaderView{
ID: 2,
CollectionID: 200,
Channel: "channel-2",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2}},
},
})
channel2.Node = 2
channel2.Version = 1
manager.ChannelDistManager.Update(1, channel1)
manager.ChannelDistManager.Update(2, channel2)
// Add some leader views to the LeaderViewManager
leaderView1 := &LeaderView{
ID: 1,
CollectionID: 100,
Channel: "channel-1",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1}},
}
leaderView2 := &LeaderView{
ID: 2,
CollectionID: 200,
Channel: "channel-2",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2}},
}
manager.LeaderViewManager.Update(1, leaderView1)
manager.LeaderViewManager.Update(2, leaderView2)
// Call GetDistributionJSON
jsonOutput := manager.GetDistributionJSON(0)

View File

@ -1,377 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package meta
import (
"sync"
"github.com/samber/lo"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type lvCriterion struct {
nodeID int64
channelName string
collectionID int64
hasOtherFilter bool
}
type LeaderViewFilter interface {
Match(*LeaderView) bool
AddFilter(*lvCriterion)
}
type lvFilterFunc func(view *LeaderView) bool
func (f lvFilterFunc) Match(view *LeaderView) bool {
return f(view)
}
func (f lvFilterFunc) AddFilter(c *lvCriterion) {
c.hasOtherFilter = true
}
type lvChannelNameFilter string
func (f lvChannelNameFilter) Match(v *LeaderView) bool {
return v.Channel == string(f)
}
func (f lvChannelNameFilter) AddFilter(c *lvCriterion) {
c.channelName = string(f)
}
type lvNodeFilter int64
func (f lvNodeFilter) Match(v *LeaderView) bool {
return v.ID == int64(f)
}
func (f lvNodeFilter) AddFilter(c *lvCriterion) {
c.nodeID = int64(f)
}
type lvCollectionFilter int64
func (f lvCollectionFilter) Match(v *LeaderView) bool {
return v.CollectionID == int64(f)
}
func (f lvCollectionFilter) AddFilter(c *lvCriterion) {
c.collectionID = int64(f)
}
func WithNodeID2LeaderView(nodeID int64) LeaderViewFilter {
return lvNodeFilter(nodeID)
}
func WithChannelName2LeaderView(channelName string) LeaderViewFilter {
return lvChannelNameFilter(channelName)
}
func WithCollectionID2LeaderView(collectionID int64) LeaderViewFilter {
return lvCollectionFilter(collectionID)
}
func WithReplica2LeaderView(replica *Replica) LeaderViewFilter {
return lvFilterFunc(func(view *LeaderView) bool {
if replica == nil {
return false
}
return replica.GetCollectionID() == view.CollectionID && replica.Contains(view.ID)
})
}
func WithSegment2LeaderView(segmentID int64, isGrowing bool) LeaderViewFilter {
return lvFilterFunc(func(view *LeaderView) bool {
if isGrowing {
_, ok := view.GrowingSegments[segmentID]
return ok
}
_, ok := view.Segments[segmentID]
return ok
})
}
func WithServiceable() LeaderViewFilter {
return lvFilterFunc(func(view *LeaderView) bool {
return view.UnServiceableError == nil
})
}
type LeaderView struct {
ID int64
CollectionID int64
Channel string
Version int64
Segments map[int64]*querypb.SegmentDist
GrowingSegments map[int64]*Segment
TargetVersion int64
NumOfGrowingRows int64
PartitionStatsVersions map[int64]int64
UnServiceableError error
}
func (view *LeaderView) Clone() *LeaderView {
segments := make(map[int64]*querypb.SegmentDist)
for k, v := range view.Segments {
segments[k] = v
}
growings := make(map[int64]*Segment)
for k, v := range view.GrowingSegments {
growings[k] = v
}
return &LeaderView{
ID: view.ID,
CollectionID: view.CollectionID,
Channel: view.Channel,
Version: view.Version,
Segments: segments,
GrowingSegments: growings,
TargetVersion: view.TargetVersion,
NumOfGrowingRows: view.NumOfGrowingRows,
PartitionStatsVersions: view.PartitionStatsVersions,
UnServiceableError: view.UnServiceableError,
}
}
type nodeViews struct {
views []*LeaderView
// channel name => LeaderView
channelView map[string]*LeaderView
// collection id => leader views
collectionViews map[int64][]*LeaderView
}
func (v nodeViews) Filter(criterion *lvCriterion, filters ...LeaderViewFilter) []*LeaderView {
mergedFilter := func(view *LeaderView) bool {
for _, filter := range filters {
if !filter.Match(view) {
return false
}
}
return true
}
var views []*LeaderView
switch {
case criterion.channelName != "":
if view, ok := v.channelView[criterion.channelName]; ok {
views = append(views, view)
}
case criterion.collectionID != 0:
views = v.collectionViews[criterion.collectionID]
default:
views = v.views
}
if criterion.hasOtherFilter {
views = lo.Filter(views, func(view *LeaderView, _ int) bool {
return mergedFilter(view)
})
}
return views
}
func composeNodeViews(views ...*LeaderView) nodeViews {
return nodeViews{
views: views,
channelView: lo.SliceToMap(views, func(view *LeaderView) (string, *LeaderView) {
return view.Channel, view
}),
collectionViews: lo.GroupBy(views, func(view *LeaderView) int64 {
return view.CollectionID
}),
}
}
type NotifyDelegatorChanges = func(collectionID ...int64)
type LeaderViewManager struct {
rwmutex sync.RWMutex
views map[int64]nodeViews // LeaderID -> Views (one per shard)
notifyFunc NotifyDelegatorChanges
}
func NewLeaderViewManager() *LeaderViewManager {
return &LeaderViewManager{
views: make(map[int64]nodeViews),
}
}
func (mgr *LeaderViewManager) SetNotifyFunc(notifyFunc NotifyDelegatorChanges) {
mgr.notifyFunc = notifyFunc
}
// Update updates the leader's views, all views have to be with the same leader ID
func (mgr *LeaderViewManager) Update(leaderID int64, views ...*LeaderView) {
mgr.rwmutex.Lock()
defer mgr.rwmutex.Unlock()
oldViews := make(map[string]*LeaderView, 0)
if _, ok := mgr.views[leaderID]; ok {
oldViews = mgr.views[leaderID].channelView
}
newViews := lo.SliceToMap(views, func(v *LeaderView) (string, *LeaderView) {
return v.Channel, v
})
// update leader views
mgr.views[leaderID] = composeNodeViews(views...)
// compute leader location change, find it's correspond collection
// 1. leader has been released from node
// 2. leader has been loaded to node
// 3. leader serviceable status changed
if mgr.notifyFunc != nil {
viewChanges := typeutil.NewUniqueSet()
for channel, oldView := range oldViews {
// if channel released from current node
if _, ok := newViews[channel]; !ok {
viewChanges.Insert(oldView.CollectionID)
}
}
serviceableChange := func(old, new *LeaderView) bool {
if old == nil || new == nil {
return true
}
return (old.UnServiceableError == nil) != (new.UnServiceableError == nil)
}
for channel, newView := range newViews {
// if channel loaded to current node
if oldView, ok := oldViews[channel]; !ok || serviceableChange(oldView, newView) {
viewChanges.Insert(newView.CollectionID)
}
}
mgr.notifyFunc(viewChanges.Collect()...)
}
}
func (mgr *LeaderViewManager) GetLeaderShardView(id int64, shard string) *LeaderView {
mgr.rwmutex.RLock()
defer mgr.rwmutex.RUnlock()
return mgr.views[id].channelView[shard]
}
func (mgr *LeaderViewManager) GetByFilter(filters ...LeaderViewFilter) []*LeaderView {
mgr.rwmutex.RLock()
defer mgr.rwmutex.RUnlock()
return mgr.getByFilter(filters...)
}
func (mgr *LeaderViewManager) getByFilter(filters ...LeaderViewFilter) []*LeaderView {
criterion := &lvCriterion{}
for _, filter := range filters {
filter.AddFilter(criterion)
}
var candidates []nodeViews
if criterion.nodeID > 0 {
nodeView, ok := mgr.views[criterion.nodeID]
if ok {
candidates = append(candidates, nodeView)
}
} else {
candidates = lo.Values(mgr.views)
}
var result []*LeaderView
for _, candidate := range candidates {
result = append(result, candidate.Filter(criterion, filters...)...)
}
return result
}
func (mgr *LeaderViewManager) GetLatestShardLeaderByFilter(filters ...LeaderViewFilter) *LeaderView {
mgr.rwmutex.RLock()
defer mgr.rwmutex.RUnlock()
views := mgr.getByFilter(filters...)
return lo.MaxBy(views, func(v1, v2 *LeaderView) bool {
return v1.Version > v2.Version
})
}
// GetLeaderView returns a slice of LeaderView objects, each representing the state of a leader node.
// It traverses the views map, converts each LeaderView to a metricsinfo.LeaderView, and collects them into a slice.
// The method locks the views map for reading to ensure thread safety.
func (mgr *LeaderViewManager) GetLeaderView(collectionID int64) []*metricsinfo.LeaderView {
mgr.rwmutex.RLock()
defer mgr.rwmutex.RUnlock()
var leaderViews []*metricsinfo.LeaderView
for _, nodeViews := range mgr.views {
var filteredViews []*LeaderView
if collectionID > 0 {
if lv, ok := nodeViews.collectionViews[collectionID]; ok {
filteredViews = lv
} else {
// skip if collectionID is not found
continue
}
} else {
// if collectionID is not set, return all leader views
filteredViews = nodeViews.views
}
for _, lv := range filteredViews {
errString := ""
if lv.UnServiceableError != nil {
errString = lv.UnServiceableError.Error()
}
leaderView := &metricsinfo.LeaderView{
LeaderID: lv.ID,
CollectionID: lv.CollectionID,
Channel: lv.Channel,
Version: lv.Version,
SealedSegments: make([]*metricsinfo.Segment, 0, len(lv.Segments)),
GrowingSegments: make([]*metricsinfo.Segment, 0, len(lv.GrowingSegments)),
TargetVersion: lv.TargetVersion,
NumOfGrowingRows: lv.NumOfGrowingRows,
UnServiceableError: errString,
}
for segID, seg := range lv.Segments {
leaderView.SealedSegments = append(leaderView.SealedSegments, &metricsinfo.Segment{
SegmentID: segID,
NodeID: seg.NodeID,
})
}
for _, seg := range lv.GrowingSegments {
leaderView.GrowingSegments = append(leaderView.GrowingSegments, &metricsinfo.Segment{
SegmentID: seg.ID,
NodeID: seg.Node,
})
}
leaderViews = append(leaderViews, leaderView)
}
}
return leaderViews
}

View File

@ -1,400 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package meta
import (
"testing"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/json"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type LeaderViewManagerSuite struct {
suite.Suite
// Data
collections []int64
channels map[int64][]string
growingSegments map[int64]map[string]int64
segments map[int64]map[string][]int64
nodes []int64
leaders map[int64]map[string]*LeaderView
// Test object
mgr *LeaderViewManager
}
func (suite *LeaderViewManagerSuite) SetupSuite() {
suite.collections = []int64{100, 101}
suite.channels = map[int64][]string{
100: {"100-dmc0", "100-dmc1"},
101: {"101-dmc0", "101-dmc1"},
}
suite.growingSegments = map[int64]map[string]int64{
100: {
"100-dmc0": 10,
"100-dmc1": 11,
},
101: {
"101-dmc0": 12,
"101-dmc1": 13,
},
}
suite.segments = map[int64]map[string][]int64{
100: {
"100-dmc0": []int64{1, 2},
"100-dmc1": {3, 4},
},
101: {
"101-dmc0": {5, 6},
"101-dmc1": {7, 8},
},
}
suite.nodes = []int64{1, 2}
// Leaders: 1, 2
suite.leaders = make(map[int64]map[string]*LeaderView)
for _, collection := range suite.collections {
for j := 0; j < 2; j++ {
channel := suite.channels[collection][j]
node := suite.nodes[j]
view := &LeaderView{
ID: node,
CollectionID: collection,
Channel: channel,
GrowingSegments: map[int64]*Segment{suite.growingSegments[collection][channel]: nil},
Segments: make(map[int64]*querypb.SegmentDist),
}
for _, segment := range suite.segments[collection][channel] {
view.Segments[segment] = &querypb.SegmentDist{
NodeID: node,
Version: 0,
}
}
if suite.leaders[node] == nil {
suite.leaders[node] = map[string]*LeaderView{
channel: view,
}
} else {
suite.leaders[node][channel] = view
}
}
}
}
func (suite *LeaderViewManagerSuite) SetupTest() {
suite.mgr = NewLeaderViewManager()
for id, views := range suite.leaders {
suite.mgr.Update(id, lo.Values(views)...)
}
}
func (suite *LeaderViewManagerSuite) TestGetByFilter() {
// Test WithChannelName
for collectionID, channels := range suite.channels {
for _, channel := range channels {
views := suite.mgr.GetByFilter(WithChannelName2LeaderView(channel))
suite.Len(views, 1)
suite.Equal(collectionID, views[0].CollectionID)
}
}
// Test WithCollection
for _, collectionID := range suite.collections {
views := suite.mgr.GetByFilter(WithCollectionID2LeaderView(collectionID))
suite.Len(views, 2)
suite.Equal(collectionID, views[0].CollectionID)
}
// Test WithNodeID
for _, nodeID := range suite.nodes {
views := suite.mgr.GetByFilter(WithNodeID2LeaderView(nodeID))
suite.Len(views, 2)
for _, view := range views {
suite.Equal(nodeID, view.ID)
}
}
// Test WithReplica
for i, collectionID := range suite.collections {
replica := newReplica(&querypb.Replica{
ID: int64(i),
CollectionID: collectionID,
Nodes: suite.nodes,
})
views := suite.mgr.GetByFilter(WithReplica2LeaderView(replica))
suite.Len(views, 2)
}
// Test WithSegment
for _, leaders := range suite.leaders {
for _, leader := range leaders {
for sid := range leader.Segments {
views := suite.mgr.GetByFilter(WithSegment2LeaderView(sid, false))
suite.Len(views, 1)
suite.Equal(views[0].ID, leader.ID)
suite.Equal(views[0].Channel, leader.Channel)
}
for sid := range leader.GrowingSegments {
views := suite.mgr.GetByFilter(WithSegment2LeaderView(sid, true))
suite.Len(views, 1)
suite.Equal(views[0].ID, leader.ID)
suite.Equal(views[0].Channel, leader.Channel)
}
view := suite.mgr.GetLeaderShardView(leader.ID, leader.Channel)
suite.Equal(view.ID, leader.ID)
suite.Equal(view.Channel, leader.Channel)
}
}
}
func (suite *LeaderViewManagerSuite) TestGetLatestShardLeader() {
nodeID := int64(1001)
collectionID := suite.collections[0]
channel := suite.channels[collectionID][0]
// add duplicate shard leader
view := &LeaderView{
ID: nodeID,
CollectionID: collectionID,
Channel: channel,
GrowingSegments: map[int64]*Segment{suite.growingSegments[collectionID][channel]: nil},
Segments: make(map[int64]*querypb.SegmentDist),
}
for _, segment := range suite.segments[collectionID][channel] {
view.Segments[segment] = &querypb.SegmentDist{
NodeID: nodeID,
Version: 1000,
}
}
view.Version = 1000
suite.mgr.Update(nodeID, view)
leader := suite.mgr.GetLatestShardLeaderByFilter(WithChannelName2LeaderView(channel))
suite.Equal(nodeID, leader.ID)
// test replica is nil
leader = suite.mgr.GetLatestShardLeaderByFilter(WithReplica2LeaderView(nil))
suite.Nil(leader)
}
func (suite *LeaderViewManagerSuite) TestClone() {
for _, leaders := range suite.leaders {
for _, leader := range leaders {
clone := leader.Clone()
suite.Equal(leader.ID, clone.ID)
suite.Equal(leader.Channel, clone.Channel)
suite.Equal(leader.CollectionID, clone.CollectionID)
}
}
}
func (suite *LeaderViewManagerSuite) TestNotifyDelegatorChanges() {
mgr := NewLeaderViewManager()
oldViews := []*LeaderView{
{
ID: 1,
CollectionID: 100,
Channel: "test-channel-1",
},
{
ID: 1,
CollectionID: 101,
Channel: "test-channel-2",
},
{
ID: 1,
CollectionID: 102,
Channel: "test-channel-3",
},
}
mgr.Update(1, oldViews...)
newViews := []*LeaderView{
{
ID: 1,
CollectionID: 101,
Channel: "test-channel-2",
},
{
ID: 1,
CollectionID: 102,
Channel: "test-channel-3",
},
{
ID: 1,
CollectionID: 103,
Channel: "test-channel-4",
},
}
retSet := typeutil.NewUniqueSet()
mgr.SetNotifyFunc(func(collectionIDs ...int64) {
retSet.Insert(collectionIDs...)
})
mgr.Update(1, newViews...)
suite.Equal(2, retSet.Len())
suite.True(retSet.Contain(100))
suite.True(retSet.Contain(103))
newViews1 := []*LeaderView{
{
ID: 1,
CollectionID: 101,
Channel: "test-channel-2",
UnServiceableError: errors.New("test error"),
},
{
ID: 1,
CollectionID: 102,
Channel: "test-channel-3",
UnServiceableError: errors.New("test error"),
},
{
ID: 1,
CollectionID: 103,
Channel: "test-channel-4",
UnServiceableError: errors.New("test error"),
},
}
retSet.Clear()
mgr.Update(1, newViews1...)
suite.Equal(3, len(retSet))
suite.True(retSet.Contain(101))
suite.True(retSet.Contain(102))
suite.True(retSet.Contain(103))
newViews2 := []*LeaderView{
{
ID: 1,
CollectionID: 101,
Channel: "test-channel-2",
UnServiceableError: errors.New("test error"),
},
{
ID: 1,
CollectionID: 102,
Channel: "test-channel-3",
},
{
ID: 1,
CollectionID: 103,
Channel: "test-channel-4",
},
}
retSet.Clear()
mgr.Update(1, newViews2...)
suite.Equal(2, len(retSet))
suite.True(retSet.Contain(102))
suite.True(retSet.Contain(103))
}
func TestLeaderViewManager(t *testing.T) {
suite.Run(t, new(LeaderViewManagerSuite))
}
func TestGetLeaderView(t *testing.T) {
manager := NewLeaderViewManager()
leaderView1 := &LeaderView{
ID: 1,
CollectionID: 100,
Channel: "channel-1",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1}},
GrowingSegments: map[int64]*Segment{
1: {SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 100, PartitionID: 10, InsertChannel: "channel-1", NumOfRows: 1000, State: commonpb.SegmentState_Growing}, Node: 1},
},
TargetVersion: 1,
NumOfGrowingRows: 1000,
UnServiceableError: nil,
}
leaderView2 := &LeaderView{
ID: 2,
CollectionID: 200,
Channel: "channel-2",
Version: 1,
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2}},
GrowingSegments: map[int64]*Segment{
2: {SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 200, PartitionID: 20, InsertChannel: "channel-2", NumOfRows: 2000, State: commonpb.SegmentState_Growing}, Node: 2},
},
TargetVersion: 1,
NumOfGrowingRows: 2000,
UnServiceableError: nil,
}
manager.Update(1, leaderView1)
manager.Update(2, leaderView2)
// Call GetLeaderView
leaderViews := manager.GetLeaderView(0)
jsonOutput, err := json.Marshal(leaderViews)
assert.NoError(t, err)
var result []*metricsinfo.LeaderView
err = json.Unmarshal(jsonOutput, &result)
assert.NoError(t, err)
assert.Len(t, result, 2)
checkResult := func(lv *metricsinfo.LeaderView) {
if lv.LeaderID == 1 {
assert.Equal(t, int64(100), lv.CollectionID)
assert.Equal(t, "channel-1", lv.Channel)
assert.Equal(t, int64(1), lv.Version)
assert.Len(t, lv.SealedSegments, 1)
assert.Len(t, lv.GrowingSegments, 1)
assert.Equal(t, int64(1), lv.SealedSegments[0].SegmentID)
assert.Equal(t, int64(1), lv.GrowingSegments[0].SegmentID)
} else if lv.LeaderID == 2 {
assert.Equal(t, int64(200), lv.CollectionID)
assert.Equal(t, "channel-2", lv.Channel)
assert.Equal(t, int64(1), lv.Version)
assert.Len(t, lv.SealedSegments, 1)
assert.Len(t, lv.GrowingSegments, 1)
assert.Equal(t, int64(2), lv.SealedSegments[0].SegmentID)
assert.Equal(t, int64(2), lv.GrowingSegments[0].SegmentID)
} else {
assert.Failf(t, "unexpected leader id", "unexpected leader id %d", lv.LeaderID)
}
}
for _, lv := range result {
checkResult(lv)
}
leaderViews = manager.GetLeaderView(1)
assert.Len(t, leaderViews, 0)
leaderViews = manager.GetLeaderView(100)
assert.Len(t, leaderViews, 1)
}

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,42 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
// ReplicaInterface defines read operations for replica metadata
type ReplicaInterface interface {
// Basic information
GetID() typeutil.UniqueID
GetCollectionID() typeutil.UniqueID
GetResourceGroup() string
// Node access
GetNodes() []int64
GetRONodes() []int64
GetRWNodes() []int64
GetROSQNodes() []int64
GetRWSQNodes() []int64
// Node iteration
RangeOverRWNodes(f func(node int64) bool)
RangeOverRONodes(f func(node int64) bool)
RangeOverRWSQNodes(f func(node int64) bool)
RangeOverROSQNodes(f func(node int64) bool)
// Node counting
RWNodesCount() int
RONodesCount() int
RWSQNodesCount() int
ROSQNodesCount() int
NodesCount() int
// Node existence checks
Contains(node int64) bool
ContainRONode(node int64) bool
ContainRWNode(node int64) bool
ContainSQNode(node int64) bool
ContainROSQNode(node int64) bool
ContainRWSQNode(node int64) bool
}
// NilReplica is used to represent a nil replica.
var NilReplica = newReplica(&querypb.Replica{
ID: -1,

View File

@ -37,6 +37,38 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
// ReplicaManagerInterface defines core operations for replica management
type ReplicaManagerInterface interface {
// Basic operations
Recover(ctx context.Context, collections []int64) error
Get(ctx context.Context, id typeutil.UniqueID) *Replica
Spawn(ctx context.Context, collection int64, replicaNumInRG map[string]int, channels []string) ([]*Replica, error)
// Replica manipulation
TransferReplica(ctx context.Context, collectionID typeutil.UniqueID, srcRGName string, dstRGName string, replicaNum int) error
MoveReplica(ctx context.Context, dstRGName string, toMove []*Replica) error
RemoveCollection(ctx context.Context, collectionID typeutil.UniqueID) error
RemoveReplicas(ctx context.Context, collectionID typeutil.UniqueID, replicas ...typeutil.UniqueID) error
// Query operations
GetByCollection(ctx context.Context, collectionID typeutil.UniqueID) []*Replica
GetByCollectionAndNode(ctx context.Context, collectionID, nodeID typeutil.UniqueID) *Replica
GetByNode(ctx context.Context, nodeID typeutil.UniqueID) []*Replica
GetByResourceGroup(ctx context.Context, rgName string) []*Replica
// Node management
RecoverNodesInCollection(ctx context.Context, collectionID typeutil.UniqueID, rgs map[string]typeutil.UniqueSet) error
RemoveNode(ctx context.Context, replicaID typeutil.UniqueID, nodes ...typeutil.UniqueID) error
RemoveSQNode(ctx context.Context, replicaID typeutil.UniqueID, nodes ...typeutil.UniqueID) error
// Metadata access
GetResourceGroupByCollection(ctx context.Context, collection typeutil.UniqueID) typeutil.Set[string]
GetReplicasJSON(ctx context.Context, meta *Meta) string
}
// Add the interface implementation assertion
var _ ReplicaManagerInterface = (*ReplicaManager)(nil)
type ReplicaManager struct {
rwmutex sync.RWMutex

View File

@ -158,6 +158,12 @@ func (segment *Segment) Clone() *Segment {
}
}
type SegmentDistManagerInterface interface {
Update(nodeID typeutil.UniqueID, segments ...*Segment)
GetByFilter(filters ...SegmentDistFilter) []*Segment
GetSegmentDist(collectionID int64) []*metricsinfo.Segment
}
type SegmentDistManager struct {
rwmutex sync.RWMutex

View File

@ -325,8 +325,8 @@ func (ob *CollectionObserver) observeChannelStatus(ctx context.Context, collecti
subChannelCount := 0
for _, channel := range channelTargets {
views := ob.dist.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(channel.GetChannelName()))
nodes := lo.Map(views, func(v *meta.LeaderView, _ int) int64 { return v.ID })
delegatorList := ob.dist.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(channel.GetChannelName()))
nodes := lo.Map(delegatorList, func(v *meta.DmChannel, _ int) int64 { return v.Node })
group := utils.GroupNodesByReplica(ctx, ob.meta.ReplicaManager, collectionID, nodes)
subChannelCount += len(group)
}
@ -354,11 +354,14 @@ func (ob *CollectionObserver) observePartitionLoadStatus(ctx context.Context, pa
loadPercentage := int32(0)
for _, segment := range segmentTargets {
views := ob.dist.LeaderViewManager.GetByFilter(
meta.WithChannelName2LeaderView(segment.GetInsertChannel()),
meta.WithSegment2LeaderView(segment.GetID(), false))
nodes := lo.Map(views, func(view *meta.LeaderView, _ int) int64 { return view.ID })
group := utils.GroupNodesByReplica(ctx, ob.meta.ReplicaManager, partition.GetCollectionID(), nodes)
delegatorList := ob.dist.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(segment.GetInsertChannel()))
loadedSegmentNodes := make([]int64, 0)
for _, delegator := range delegatorList {
if delegator.View.Segments[segment.GetID()] != nil {
loadedSegmentNodes = append(loadedSegmentNodes, delegator.Node)
}
}
group := utils.GroupNodesByReplica(ctx, ob.meta.ReplicaManager, partition.GetCollectionID(), loadedSegmentNodes)
loadedCount += len(group)
}
loadPercentage = int32(loadedCount * 100 / (targetNum * int(replicaNum)))

View File

@ -258,49 +258,90 @@ func (suite *CollectionObserverSuite) TestObserve() {
// Collection 100 loaded before timeout,
// collection 101 timeout
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
})
view := &meta.LeaderView{
ID: 2,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
ch1 := &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc0",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
},
}
suite.dist.LeaderViewManager.Update(2, &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
}, view)
suite.dist.ChannelDistManager.Update(1, ch1)
view1 := &meta.LeaderView{
ID: 3,
CollectionID: 102,
Channel: "102-dmc0",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 5, Version: 0}},
ch2 := &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 103,
ChannelName: "103-dmc0",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
},
}
ch3 := &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc1",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
},
}
suite.dist.ChannelDistManager.Update(2, ch2, ch3)
ch4 := &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 102,
ChannelName: "102-dmc0",
},
Node: 3,
View: &meta.LeaderView{
ID: 3,
CollectionID: 102,
Channel: "102-dmc0",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 5, Version: 0}},
},
}
ch5 := &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 103,
ChannelName: "103-dmc0",
},
Node: 3,
View: &meta.LeaderView{
ID: 3,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
},
}
segmentsInfo, ok := suite.segments[103]
suite.True(ok)
view2 := &meta.LeaderView{
ID: 3,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
}
for _, segment := range segmentsInfo {
view2.Segments[segment.GetID()] = &querypb.SegmentDist{
NodeID: 3, Version: 0,
}
view.Segments[segment.GetID()] = &querypb.SegmentDist{
ch2.View.Segments[segment.GetID()] = &querypb.SegmentDist{
NodeID: 2, Version: 0,
}
ch5.View.Segments[segment.GetID()] = &querypb.SegmentDist{
NodeID: 3, Version: 0,
}
}
suite.dist.LeaderViewManager.Update(3, view1, view2)
suite.dist.ChannelDistManager.Update(3, ch4, ch5)
suite.broker.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
@ -331,23 +372,43 @@ func (suite *CollectionObserverSuite) TestObservePartition() {
// Partition 10 loaded
// Partition 11 timeout
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
}, &meta.LeaderView{
ID: 1,
CollectionID: 101,
Channel: "",
Segments: map[int64]*querypb.SegmentDist{},
suite.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc0",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
},
}, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 101,
ChannelName: "101-dmc0",
},
Node: 1,
View: &meta.LeaderView{
ID: 1,
CollectionID: 101,
Channel: "101-dmc0",
},
})
suite.dist.LeaderViewManager.Update(2, &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
suite.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc1",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
},
})
suite.Eventually(func() bool {

View File

@ -34,6 +34,8 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/streamingutil"
"github.com/milvus-io/milvus/pkg/v2/kv"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/streaming/util/types"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
@ -152,9 +154,15 @@ func (suite *ReplicaObserverSuite) TestCheckNodesInReplica() {
// Add some segment on nodes.
for nodeID := int64(1); nodeID <= 4; nodeID++ {
suite.distMgr.ChannelDistManager.Update(
nodeID,
utils.CreateTestChannel(suite.collectionID, nodeID, 1, "test-insert-channel1"))
suite.distMgr.ChannelDistManager.Update(nodeID, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collectionID,
ChannelName: "test-insert-channel1",
},
Node: nodeID,
Version: 1,
View: &meta.LeaderView{ID: nodeID, CollectionID: suite.collectionID, Channel: "test-insert-channel1", Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
suite.distMgr.SegmentDistManager.Update(
nodeID,
utils.CreateTestSegment(suite.collectionID, suite.partitionID, 1, nodeID, 1, "test-insert-channel1"))

View File

@ -393,35 +393,36 @@ func (ob *TargetObserver) shouldUpdateCurrentTarget(ctx context.Context, collect
return false
}
collectionReadyLeaders := make([]*meta.LeaderView, 0)
newVersion := ob.targetMgr.GetCollectionTargetVersion(ctx, collectionID, meta.NextTarget)
collReadyDelegatorList := make([]*meta.DmChannel, 0)
for channel := range channelNames {
channelReadyLeaders := lo.Filter(ob.distMgr.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(channel)), func(leader *meta.LeaderView, _ int) bool {
return utils.CheckDelegatorDataReady(ob.nodeMgr, ob.targetMgr, leader, meta.NextTarget) == nil
chReadyDelegatorList := lo.Filter(ob.distMgr.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(channel)), func(ch *meta.DmChannel, _ int) bool {
return (newVersion == ch.View.TargetVersion && ch.IsServiceable()) ||
utils.CheckDelegatorDataReady(ob.nodeMgr, ob.targetMgr, ch.View, meta.NextTarget) == nil
})
// to avoid stuck here in dynamic increase replica case, we just check available delegator number
if int32(len(channelReadyLeaders)) < replicaNum {
if int32(len(chReadyDelegatorList)) < replicaNum {
log.RatedInfo(10, "channel not ready",
zap.Int("readyReplicaNum", len(channelReadyLeaders)),
zap.Int("readyReplicaNum", len(chReadyDelegatorList)),
zap.String("channelName", channel),
)
return false
}
collectionReadyLeaders = append(collectionReadyLeaders, channelReadyLeaders...)
collReadyDelegatorList = append(collReadyDelegatorList, chReadyDelegatorList...)
}
var partitions []int64
var indexInfo []*indexpb.IndexInfo
var err error
newVersion := ob.targetMgr.GetCollectionTargetVersion(ctx, collectionID, meta.NextTarget)
for _, leader := range collectionReadyLeaders {
updateVersionAction := ob.checkNeedUpdateTargetVersion(ctx, leader, newVersion)
for _, d := range collReadyDelegatorList {
updateVersionAction := ob.checkNeedUpdateTargetVersion(ctx, d.View, newVersion)
if updateVersionAction == nil {
continue
}
replica := ob.meta.ReplicaManager.GetByCollectionAndNode(ctx, collectionID, leader.ID)
replica := ob.meta.ReplicaManager.GetByCollectionAndNode(ctx, collectionID, d.Node)
if replica == nil {
log.Warn("replica not found", zap.Int64("nodeID", leader.ID), zap.Int64("collectionID", collectionID))
log.Warn("replica not found", zap.Int64("nodeID", d.Node), zap.Int64("collectionID", collectionID))
continue
}
// init all the meta information
@ -440,14 +441,14 @@ func (ob *TargetObserver) shouldUpdateCurrentTarget(ctx context.Context, collect
}
}
if !ob.sync(ctx, replica, leader, []*querypb.SyncAction{updateVersionAction}, partitions, indexInfo) {
if !ob.sync(ctx, replica, d.View, []*querypb.SyncAction{updateVersionAction}, partitions, indexInfo) {
return false
}
}
return true
}
func (ob *TargetObserver) sync(ctx context.Context, replica *meta.Replica, leaderView *meta.LeaderView, diffs []*querypb.SyncAction,
func (ob *TargetObserver) sync(ctx context.Context, replica *meta.Replica, LeaderView *meta.LeaderView, diffs []*querypb.SyncAction,
partitions []int64, indexInfo []*indexpb.IndexInfo,
) bool {
if len(diffs) == 0 {
@ -456,22 +457,22 @@ func (ob *TargetObserver) sync(ctx context.Context, replica *meta.Replica, leade
replicaID := replica.GetID()
log := log.With(
zap.Int64("leaderID", leaderView.ID),
zap.Int64("collectionID", leaderView.CollectionID),
zap.String("channel", leaderView.Channel),
zap.Int64("leaderID", LeaderView.ID),
zap.Int64("collectionID", LeaderView.CollectionID),
zap.String("channel", LeaderView.Channel),
)
req := &querypb.SyncDistributionRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_SyncDistribution),
),
CollectionID: leaderView.CollectionID,
CollectionID: LeaderView.CollectionID,
ReplicaID: replicaID,
Channel: leaderView.Channel,
Channel: LeaderView.Channel,
Actions: diffs,
LoadMeta: &querypb.LoadMetaInfo{
LoadType: ob.meta.GetLoadType(ctx, leaderView.CollectionID),
CollectionID: leaderView.CollectionID,
LoadType: ob.meta.GetLoadType(ctx, LeaderView.CollectionID),
CollectionID: LeaderView.CollectionID,
PartitionIDs: partitions,
ResourceGroup: replica.GetResourceGroup(),
},
@ -481,7 +482,7 @@ func (ob *TargetObserver) sync(ctx context.Context, replica *meta.Replica, leade
ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.BrokerTimeout.GetAsDuration(time.Millisecond))
defer cancel()
resp, err := ob.cluster.SyncDistribution(ctx, leaderView.ID, req)
resp, err := ob.cluster.SyncDistribution(ctx, LeaderView.ID, req)
if err != nil {
log.Warn("failed to sync distribution", zap.Error(err))
return false

View File

@ -156,8 +156,13 @@ func (suite *TargetObserverSuite) TestTriggerUpdateTarget() {
len(suite.targetMgr.GetDmChannelsByCollection(ctx, suite.collectionID, meta.NextTarget)) == 2
}, 5*time.Second, 1*time.Second)
suite.distMgr.LeaderViewManager.Update(2,
&meta.LeaderView{
suite.distMgr.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collectionID,
ChannelName: "channel-1",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-1",
@ -165,7 +170,13 @@ func (suite *TargetObserverSuite) TestTriggerUpdateTarget() {
11: {NodeID: 2},
},
},
&meta.LeaderView{
}, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collectionID,
ChannelName: "channel-2",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-2",
@ -173,7 +184,7 @@ func (suite *TargetObserverSuite) TestTriggerUpdateTarget() {
12: {NodeID: 2},
},
},
)
})
// Never update current target if it's empty, even the next target is ready
suite.Eventually(func() bool {
@ -203,27 +214,36 @@ func (suite *TargetObserverSuite) TestTriggerUpdateTarget() {
// Manually update next target
ready, err := suite.observer.UpdateNextTarget(suite.collectionID)
suite.NoError(err)
ch1View := &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-1",
Segments: map[int64]*querypb.SegmentDist{
11: {NodeID: 2},
13: {NodeID: 2},
suite.distMgr.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collectionID,
ChannelName: "channel-1",
},
}
ch2View := &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-2",
Segments: map[int64]*querypb.SegmentDist{
12: {NodeID: 2},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-1",
Segments: map[int64]*querypb.SegmentDist{
11: {NodeID: 2},
13: {NodeID: 2},
},
},
}
suite.distMgr.LeaderViewManager.Update(2, ch1View, ch2View)
}, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collectionID,
ChannelName: "channel-2",
},
Node: 2,
View: &meta.LeaderView{
ID: 2,
CollectionID: suite.collectionID,
Channel: "channel-2",
Segments: map[int64]*querypb.SegmentDist{
12: {NodeID: 2},
},
},
})
suite.broker.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
@ -242,6 +262,7 @@ func (suite *TargetObserverSuite) TestTriggerUpdateTarget() {
len(suite.targetMgr.GetDmChannelsByCollection(ctx, suite.collectionID, meta.CurrentTarget)) == 2
}, 7*time.Second, 1*time.Second)
ch1View := suite.distMgr.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel("channel-1"))[0].View
action := suite.observer.checkNeedUpdateTargetVersion(ctx, ch1View, 100)
suite.Equal(action.GetDeleteCP().Timestamp, uint64(200))
}

View File

@ -357,9 +357,6 @@ func (s *Server) initQueryCoord() error {
s.initObserver()
// Init heartbeat
syncTargetVersionFn := func(collectionID int64) {
s.targetObserver.TriggerUpdateCurrentTarget(collectionID)
}
log.Info("init dist controller")
s.distController = dist.NewDistController(
s.cluster,
@ -367,7 +364,7 @@ func (s *Server) initQueryCoord() error {
s.dist,
s.targetMgr,
s.taskScheduler,
syncTargetVersionFn,
s.leaderCacheObserver.RegisterEvent,
)
// Init load status cache
@ -418,7 +415,6 @@ func (s *Server) initMeta() error {
s.dist = &meta.DistributionManager{
SegmentDistManager: meta.NewSegmentDistManager(),
ChannelDistManager: meta.NewChannelDistManager(),
LeaderViewManager: meta.NewLeaderViewManager(),
}
s.targetMgr = meta.NewTargetManager(s.broker, s.meta)
err = s.targetMgr.Recover(s.ctx, s.store)
@ -459,7 +455,6 @@ func (s *Server) initObserver() {
s.leaderCacheObserver = observers.NewLeaderCacheObserver(
s.proxyClientManager,
)
s.dist.LeaderViewManager.SetNotifyFunc(s.leaderCacheObserver.RegisterEvent)
}
func (s *Server) afterStart() {}
@ -774,7 +769,6 @@ func (s *Server) handleNodeDown(node int64) {
s.distController.Remove(node)
// Clear dist
s.dist.LeaderViewManager.Update(node)
s.dist.ChannelDistManager.Update(node)
s.dist.SegmentDistManager.Update(node)

View File

@ -43,7 +43,6 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/util/sessionutil"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
@ -577,16 +576,13 @@ func (suite *ServerSuite) hackServer() {
suite.server.nodeMgr,
)
syncTargetVersionFn := func(collectionID int64) {
suite.server.targetObserver.Check(context.Background(), collectionID, common.AllPartitionsID)
}
suite.server.distController = dist.NewDistController(
suite.server.cluster,
suite.server.nodeMgr,
suite.server.dist,
suite.server.targetMgr,
suite.server.taskScheduler,
syncTargetVersionFn,
suite.server.leaderCacheObserver.RegisterEvent,
)
suite.server.checkerController = checkers.NewCheckerController(
suite.server.meta,

View File

@ -1701,7 +1701,6 @@ func (suite *ServiceSuite) TestGetShardLeadersFailed() {
for _, node := range suite.nodes {
suite.dist.SegmentDistManager.Update(node)
suite.dist.ChannelDistManager.Update(node)
suite.dist.LeaderViewManager.Update(node)
}
suite.updateChannelDistWithoutSegment(ctx, collection)
suite.fetchHeartbeats(time.Now())
@ -1712,7 +1711,7 @@ func (suite *ServiceSuite) TestGetShardLeadersFailed() {
// channel not subscribed
for _, node := range suite.nodes {
suite.dist.LeaderViewManager.Update(node)
suite.dist.ChannelDistManager.Update(node)
}
for _, collection := range suite.collections {
req := &querypb.GetShardLeadersRequest{
@ -1955,20 +1954,26 @@ func (suite *ServiceSuite) updateChannelDist(ctx context.Context, collection int
for _, replica := range replicas {
i := 0
for _, node := range suite.sortInt64(replica.GetNodes()) {
suite.dist.ChannelDistManager.Update(node, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
}))
suite.dist.LeaderViewManager.Update(node, &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
Segments: lo.SliceToMap(segments, func(segment int64) (int64, *querypb.SegmentDist) {
return segment, &querypb.SegmentDist{
NodeID: node,
Version: time.Now().Unix(),
}
}),
suite.dist.ChannelDistManager.Update(node, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
},
Node: node,
View: &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
Segments: lo.SliceToMap(segments, func(segment int64) (int64, *querypb.SegmentDist) {
return segment, &querypb.SegmentDist{
NodeID: node,
Version: time.Now().Unix(),
}
}),
Status: &querypb.LeaderViewStatus{
Serviceable: true,
},
},
})
i++
if i >= len(channels) {
@ -1992,15 +1997,17 @@ func (suite *ServiceSuite) updateChannelDistWithoutSegment(ctx context.Context,
for _, replica := range replicas {
i := 0
for _, node := range suite.sortInt64(replica.GetNodes()) {
suite.dist.ChannelDistManager.Update(node, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
}))
suite.dist.LeaderViewManager.Update(node, &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
UnServiceableError: merr.ErrSegmentLack,
suite.dist.ChannelDistManager.Update(node, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channels[i],
},
Node: node,
View: &meta.LeaderView{
ID: node,
CollectionID: collection,
Channel: channels[i],
},
})
i++
if i >= len(channels) {

View File

@ -176,9 +176,9 @@ func (action *ChannelAction) Desc() string {
}
func (action *ChannelAction) IsFinished(distMgr *meta.DistributionManager) bool {
views := distMgr.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(action.ChannelName()))
_, hasNode := lo.Find(views, func(v *meta.LeaderView) bool {
return v.ID == action.Node()
delegator := distMgr.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(action.ChannelName()))
_, hasNode := lo.Find(delegator, func(v *meta.DmChannel) bool {
return v.Node == action.Node()
})
isGrow := action.Type() == ActionTypeGrow

View File

@ -18,6 +18,7 @@ package task
import (
"context"
"fmt"
"sync"
"time"
@ -215,18 +216,18 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
log.Warn(msg, zap.Error(err))
return err
}
view := ex.dist.LeaderViewManager.GetLatestShardLeaderByFilter(meta.WithReplica2LeaderView(replica), meta.WithChannelName2LeaderView(action.Shard))
view := ex.dist.ChannelDistManager.GetShardLeader(task.Shard(), replica)
if view == nil {
msg := "no shard leader for the segment to execute loading"
err = merr.WrapErrChannelNotFound(task.Shard(), "shard delegator not found")
log.Warn(msg, zap.Error(err))
return err
}
log = log.With(zap.Int64("shardLeader", view.ID))
log = log.With(zap.Int64("shardLeader", view.Node))
startTs := time.Now()
log.Info("load segments...")
status, err := ex.cluster.LoadSegments(task.Context(), view.ID, req)
status, err := ex.cluster.LoadSegments(task.Context(), view.Node, req)
err = merr.CheckRPCCall(status, err)
if err != nil {
log.Warn("failed to load segment", zap.Error(err))
@ -282,15 +283,15 @@ func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
dstNode = action.Node()
req.NeedTransfer = false
} else {
view := ex.dist.LeaderViewManager.GetLatestShardLeaderByFilter(meta.WithReplica2LeaderView(replica), meta.WithChannelName2LeaderView(action.Shard))
view := ex.dist.ChannelDistManager.GetShardLeader(task.Shard(), replica)
if view == nil {
msg := "no shard leader for the segment to execute releasing"
err := merr.WrapErrChannelNotFound(task.Shard(), "shard delegator not found")
log.Warn(msg, zap.Error(err))
return
}
dstNode = view.ID
log = log.With(zap.Int64("shardLeader", view.ID))
dstNode = view.Node
log = log.With(zap.Int64("shardLeader", view.Node))
req.NeedTransfer = true
}
}
@ -376,6 +377,13 @@ func (ex *Executor) subscribeChannel(task *ChannelTask, step int) error {
log.Warn(msg, zap.String("channelName", action.ChannelName()))
return merr.WrapErrChannelReduplicate(action.ChannelName())
}
partitions, err = utils.GetPartitions(ctx, ex.targetMgr, task.collectionID)
if err != nil {
log.Warn("failed to get partitions", zap.Error(err))
return merr.WrapErrServiceInternal(fmt.Sprintf("failed to get partitions for collection=%d", task.CollectionID()))
}
req := packSubChannelRequest(
task,
action,
@ -383,6 +391,7 @@ func (ex *Executor) subscribeChannel(task *ChannelTask, step int) error {
loadMeta,
dmChannel,
indexInfo,
partitions,
)
err = fillSubChannelRequest(ctx, req, ex.broker, ex.shouldIncludeFlushedSegmentInfo(action.Node()))
if err != nil {

View File

@ -498,11 +498,9 @@ func (scheduler *taskScheduler) preAdd(task Task) error {
taskType := GetTaskType(task)
if taskType == TaskTypeMove {
views := scheduler.distMgr.LeaderViewManager.GetByFilter(
meta.WithChannelName2LeaderView(task.Shard()),
meta.WithSegment2LeaderView(task.SegmentID(), false))
if len(views) == 0 {
return merr.WrapErrServiceInternal("segment's delegator not found, stop balancing")
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
if leader == nil {
return merr.WrapErrServiceInternal("segment's delegator leader not found, stop balancing")
}
segmentInTargetNode := scheduler.distMgr.SegmentDistManager.GetByFilter(meta.WithNodeID(task.Actions()[1].Node()), meta.WithSegmentID(task.SegmentID()))
if len(segmentInTargetNode) == 0 {
@ -530,15 +528,15 @@ func (scheduler *taskScheduler) preAdd(task Task) error {
taskType := GetTaskType(task)
if taskType == TaskTypeGrow {
views := scheduler.distMgr.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(task.Channel()))
nodesWithChannel := lo.Map(views, func(v *meta.LeaderView, _ int) UniqueID { return v.ID })
delegatorList := scheduler.distMgr.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(task.Channel()))
nodesWithChannel := lo.Map(delegatorList, func(v *meta.DmChannel, _ int) UniqueID { return v.Node })
replicaNodeMap := utils.GroupNodesByReplica(task.ctx, scheduler.meta.ReplicaManager, task.CollectionID(), nodesWithChannel)
if _, ok := replicaNodeMap[task.ReplicaID()]; ok {
return merr.WrapErrServiceInternal("channel subscribed, it can be only balanced")
}
} else if taskType == TaskTypeMove {
views := scheduler.distMgr.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(task.Channel()))
_, ok := lo.Find(views, func(v *meta.LeaderView) bool { return v.ID == task.Actions()[1].Node() })
delegatorList := scheduler.distMgr.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(task.Channel()))
_, ok := lo.Find(delegatorList, func(v *meta.DmChannel) bool { return v.Node == task.Actions()[1].Node() })
if !ok {
return merr.WrapErrServiceInternal("source channel unsubscribed, stop balancing")
}
@ -827,15 +825,14 @@ func (scheduler *taskScheduler) isRelated(task Task, node int64) bool {
if segment == nil {
continue
}
replica := scheduler.meta.ReplicaManager.GetByCollectionAndNode(task.ctx, task.CollectionID(), action.Node())
if replica == nil {
if task.replica == nil {
continue
}
leader, ok := scheduler.distMgr.GetShardLeader(replica, segment.GetInsertChannel())
if !ok {
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
if leader == nil {
continue
}
if leader == node {
if leader.Node == node {
return true
}
}
@ -855,24 +852,23 @@ func (scheduler *taskScheduler) preProcess(task Task) bool {
actions, step := task.Actions(), task.Step()
for step < len(actions) && actions[step].IsFinished(scheduler.distMgr) {
if GetTaskType(task) == TaskTypeMove && actions[step].Type() == ActionTypeGrow {
var ready bool
switch actions[step].(type) {
var newDelegatorReady bool
switch action := actions[step].(type) {
case *ChannelAction:
// if balance channel task has finished grow action, block reduce action until
// segment distribution has been sync to new delegator, cause new delegator may
// causes a few time to load delta log, if reduce the old delegator in advance,
// new delegator can't service search and query, will got no available channel error
channelAction := actions[step].(*ChannelAction)
leader := scheduler.distMgr.LeaderViewManager.GetLeaderShardView(channelAction.Node(), channelAction.Shard)
ready = leader.UnServiceableError == nil
// wait for new delegator becomes leader, then try to remove old leader
task := task.(*ChannelTask)
delegator := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
newDelegatorReady = delegator != nil && delegator.Node == action.Node()
default:
ready = true
newDelegatorReady = true
}
if !ready {
log.Ctx(scheduler.ctx).WithRateGroup("qcv2.taskScheduler", 1, 60).RatedInfo(30, "Blocking reduce action in balance channel task",
zap.Int64("collectionID", task.CollectionID()),
zap.Int64("taskID", task.ID()))
if !newDelegatorReady {
log.Ctx(scheduler.ctx).
WithRateGroup("qcv2.preProcess", 1, 60).
RatedInfo(30, "Blocking reduce action in balance channel task",
zap.Int64("collectionID", task.CollectionID()),
zap.String("channelName", task.Shard()),
zap.Int64("taskID", task.ID()))
break
}
}
@ -1112,13 +1108,8 @@ func (scheduler *taskScheduler) checkSegmentTaskStale(task *SegmentTask) error {
return merr.WrapErrSegmentReduplicate(task.SegmentID(), "target doesn't contain this segment")
}
replica := scheduler.meta.ReplicaManager.GetByCollectionAndNode(task.ctx, task.CollectionID(), action.Node())
if replica == nil {
log.Ctx(task.Context()).Warn("task stale due to replica not found", WrapTaskLog(task)...)
return merr.WrapErrReplicaNotFound(task.CollectionID(), "by collectionID")
}
_, ok := scheduler.distMgr.GetShardLeader(replica, segment.GetInsertChannel())
if !ok {
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
if leader == nil {
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task)...)
return merr.WrapErrChannelNotFound(segment.GetInsertChannel(), "failed to get shard delegator")
}
@ -1171,21 +1162,15 @@ func (scheduler *taskScheduler) checkLeaderTaskStale(task *LeaderTask) error {
return merr.WrapErrSegmentReduplicate(task.SegmentID(), "target doesn't contain this segment")
}
replica := scheduler.meta.ReplicaManager.GetByCollectionAndNode(task.ctx, task.CollectionID(), action.Node())
if replica == nil {
log.Ctx(task.Context()).Warn("task stale due to replica not found", WrapTaskLog(task, zap.Int64("leaderID", task.leaderID))...)
return merr.WrapErrReplicaNotFound(task.CollectionID(), "by collectionID")
}
view := scheduler.distMgr.GetLeaderShardView(task.leaderID, task.Shard())
if view == nil {
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
if leader == nil {
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task, zap.Int64("leaderID", task.leaderID))...)
return merr.WrapErrChannelNotFound(task.Shard(), "failed to get shard delegator")
}
case ActionTypeReduce:
view := scheduler.distMgr.GetLeaderShardView(task.leaderID, task.Shard())
if view == nil {
leader := scheduler.distMgr.ChannelDistManager.GetShardLeader(task.Shard(), task.replica)
if leader == nil {
log.Ctx(task.Context()).Warn("task stale due to leader not found", WrapTaskLog(task, zap.Int64("leaderID", task.leaderID))...)
return merr.WrapErrChannelNotFound(task.Shard(), "failed to get shard delegator")
}

View File

@ -98,7 +98,13 @@ func (suite *TaskSuite) SetupSuite() {
params.Save(params.EtcdCfg.Endpoints.Key, strings.Join(addressList, ","))
suite.collection = 1000
suite.replica = newReplicaDefaultRG(10)
suite.replica = meta.NewReplica(&querypb.Replica{
CollectionID: suite.collection,
ID: 10,
ResourceGroup: meta.DefaultResourceGroupName,
Nodes: []int64{1, 2, 3},
}, typeutil.NewUniqueSet(1, 2, 3))
suite.subChannels = []string{
"sub-0",
"sub-1",
@ -289,15 +295,23 @@ func (suite *TaskSuite) TestSubscribeChannelTask() {
// Process tasks done
// Dist contains channels
views := make([]*meta.LeaderView, 0)
channels := []*meta.DmChannel{}
for _, channel := range suite.subChannels {
views = append(views, &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: channel,
channels = append(channels, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: channel,
},
})
}
suite.dist.LeaderViewManager.Update(targetNode, views...)
suite.dist.ChannelDistManager.Update(targetNode, channels...)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -327,15 +341,23 @@ func (suite *TaskSuite) TestSubmitDuplicateSubscribeChannelTask() {
tasks = append(tasks, task)
}
views := make([]*meta.LeaderView, 0)
channels := []*meta.DmChannel{}
for _, channel := range suite.subChannels {
views = append(views, &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: channel,
channels = append(channels, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: channel,
},
})
}
suite.dist.LeaderViewManager.Update(targetNode, views...)
suite.dist.ChannelDistManager.Update(targetNode, channels...)
for _, task := range tasks {
err := suite.scheduler.Add(task)
@ -378,10 +400,18 @@ func (suite *TaskSuite) TestUnsubscribeChannelTask() {
suite.target.UpdateCollectionNextTarget(ctx, suite.collection)
// Only first channel exists
suite.dist.LeaderViewManager.Update(targetNode, &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: suite.unsubChannels[0],
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: suite.unsubChannels[0],
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: suite.unsubChannels[0],
},
})
suite.AssertTaskNum(0, len(suite.unsubChannels), len(suite.unsubChannels), 0)
@ -390,7 +420,7 @@ func (suite *TaskSuite) TestUnsubscribeChannelTask() {
suite.AssertTaskNum(1, 0, 1, 0)
// Update dist
suite.dist.LeaderViewManager.Update(targetNode)
suite.dist.ChannelDistManager.Update(targetNode)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -440,11 +470,17 @@ func (suite *TaskSuite) TestLoadSegmentTask() {
suite.cluster.EXPECT().LoadSegments(mock.Anything, targetNode, mock.Anything).Return(merr.Success(), nil)
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.LeaderViewManager.Update(targetNode, utils.CreateTestLeaderView(targetNode, suite.collection, channel.ChannelName, map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: channel,
Node: targetNode,
Version: 1,
View: &meta.LeaderView{
ID: targetNode,
CollectionID: suite.collection,
Channel: channel.ChannelName,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
tasks := []Task{}
segments := make([]*datapb.SegmentInfo, 0)
for _, segment := range suite.loadSegments {
@ -490,7 +526,15 @@ func (suite *TaskSuite) TestLoadSegmentTask() {
distSegments := lo.Map(segments, func(info *datapb.SegmentInfo, _ int) *meta.Segment {
return meta.SegmentFromInfo(info)
})
suite.dist.LeaderViewManager.Update(targetNode, view)
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: view,
})
suite.dist.SegmentDistManager.Update(targetNode, distSegments...)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -541,11 +585,15 @@ func (suite *TaskSuite) TestLoadSegmentTaskNotIndex() {
suite.cluster.EXPECT().LoadSegments(mock.Anything, targetNode, mock.Anything).Return(merr.Success(), nil)
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.LeaderViewManager.Update(targetNode, utils.CreateTestLeaderView(targetNode, suite.collection, channel.ChannelName, map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := []Task{}
segments := make([]*datapb.SegmentInfo, 0)
for _, segment := range suite.loadSegments {
@ -590,7 +638,6 @@ func (suite *TaskSuite) TestLoadSegmentTaskNotIndex() {
distSegments := lo.Map(segments, func(info *datapb.SegmentInfo, _ int) *meta.Segment {
return meta.SegmentFromInfo(info)
})
suite.dist.LeaderViewManager.Update(targetNode, view)
suite.dist.SegmentDistManager.Update(targetNode, distSegments...)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -635,11 +682,16 @@ func (suite *TaskSuite) TestLoadSegmentTaskFailed() {
}
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.LeaderViewManager.Update(targetNode, utils.CreateTestLeaderView(targetNode, suite.collection, channel.ChannelName, map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := []Task{}
segments := make([]*datapb.SegmentInfo, 0)
for _, segment := range suite.loadSegments {
@ -727,7 +779,6 @@ func (suite *TaskSuite) TestReleaseSegmentTask() {
suite.NoError(err)
}
suite.dist.SegmentDistManager.Update(targetNode, segments...)
suite.dist.LeaderViewManager.Update(targetNode, view)
segmentsNum := len(suite.releaseSegments)
suite.AssertTaskNum(0, segmentsNum, 0, segmentsNum)
@ -737,7 +788,6 @@ func (suite *TaskSuite) TestReleaseSegmentTask() {
suite.AssertTaskNum(segmentsNum, 0, 0, segmentsNum)
// Process tasks done
suite.dist.LeaderViewManager.Update(targetNode)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -833,12 +883,23 @@ func (suite *TaskSuite) TestMoveSegmentTask() {
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}
suite.dist.ChannelDistManager.Update(leader, meta.DmChannelFromVChannel(vchannel))
suite.dist.ChannelDistManager.Update(leader, &meta.DmChannel{
VchannelInfo: vchannel,
Node: leader,
Version: 1,
View: &meta.LeaderView{
ID: leader,
CollectionID: suite.collection,
Channel: channel.ChannelName,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
view := &meta.LeaderView{
ID: leader,
CollectionID: suite.collection,
Channel: channel.ChannelName,
Segments: make(map[int64]*querypb.SegmentDist),
Status: &querypb.LeaderViewStatus{Serviceable: true},
}
tasks := []Task{}
segments := make([]*meta.Segment, 0)
@ -869,7 +930,14 @@ func (suite *TaskSuite) TestMoveSegmentTask() {
suite.target.UpdateCollectionNextTarget(ctx, suite.collection)
suite.target.UpdateCollectionCurrentTarget(ctx, suite.collection)
suite.dist.SegmentDistManager.Update(sourceNode, segments...)
suite.dist.LeaderViewManager.Update(leader, view)
suite.dist.ChannelDistManager.Update(leader, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: leader,
View: view,
})
for _, task := range tasks {
err := suite.scheduler.Add(task)
suite.NoError(err)
@ -891,7 +959,6 @@ func (suite *TaskSuite) TestMoveSegmentTask() {
return meta.SegmentFromInfo(info)
})
suite.dist.LeaderViewManager.Update(leader, view)
suite.dist.SegmentDistManager.Update(targetNode, distSegments...)
// First action done, execute the second action
suite.dispatchAndWait(leader)
@ -920,7 +987,17 @@ func (suite *TaskSuite) TestMoveSegmentTaskStale() {
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}
suite.dist.ChannelDistManager.Update(leader, meta.DmChannelFromVChannel(vchannel))
suite.dist.ChannelDistManager.Update(leader, &meta.DmChannel{
VchannelInfo: vchannel,
Node: leader,
Version: 1,
View: &meta.LeaderView{
ID: leader,
CollectionID: suite.collection,
Channel: channel.ChannelName,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
view := &meta.LeaderView{
ID: leader,
CollectionID: suite.collection,
@ -952,7 +1029,6 @@ func (suite *TaskSuite) TestMoveSegmentTaskStale() {
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, suite.collection).Return([]*datapb.VchannelInfo{vchannel}, segmentInfos, nil)
suite.target.UpdateCollectionNextTarget(ctx, suite.collection)
suite.target.UpdateCollectionCurrentTarget(ctx, suite.collection)
suite.dist.LeaderViewManager.Update(leader, view)
for _, task := range tasks {
err := suite.scheduler.Add(task)
suite.Error(err)
@ -1002,11 +1078,16 @@ func (suite *TaskSuite) TestTaskCanceled() {
suite.cluster.EXPECT().LoadSegments(mock.Anything, targetNode, mock.Anything).Return(merr.Success(), nil)
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.LeaderViewManager.Update(targetNode, utils.CreateTestLeaderView(targetNode, suite.collection, channel.ChannelName, map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := []Task{}
segmentInfos := []*datapb.SegmentInfo{}
for _, segment := range suite.loadSegments {
@ -1092,12 +1173,15 @@ func (suite *TaskSuite) TestSegmentTaskStale() {
suite.cluster.EXPECT().LoadSegments(mock.Anything, targetNode, mock.Anything).Return(merr.Success(), nil)
// Test load segment task
suite.meta.ReplicaManager.Put(ctx, createReplica(suite.collection, targetNode))
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.LeaderViewManager.Update(targetNode, utils.CreateTestLeaderView(targetNode, suite.collection, channel.ChannelName, map[int64]int64{}, map[int64]*meta.Segment{}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := []Task{}
for _, segment := range suite.loadSegments {
task, err := NewSegmentTask(
@ -1244,10 +1328,12 @@ func (suite *TaskSuite) TestLeaderTaskSet() {
suite.cluster.EXPECT().SyncDistribution(mock.Anything, targetNode, mock.Anything).Return(merr.Success(), nil)
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: channel,
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
tasks := []Task{}
segments := make([]*datapb.SegmentInfo, 0)
for _, segment := range suite.loadSegments {
@ -1278,8 +1364,16 @@ func (suite *TaskSuite) TestLeaderTaskSet() {
CollectionID: suite.collection,
Channel: channel.GetChannelName(),
Segments: map[int64]*querypb.SegmentDist{},
Status: &querypb.LeaderViewStatus{Serviceable: true},
}
suite.dist.LeaderViewManager.Update(targetNode, view)
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.GetChannelName(),
},
Node: targetNode,
View: view,
})
// Process tasks
suite.dispatchAndWait(targetNode)
@ -1299,7 +1393,14 @@ func (suite *TaskSuite) TestLeaderTaskSet() {
distSegments := lo.Map(segments, func(info *datapb.SegmentInfo, _ int) *meta.Segment {
return meta.SegmentFromInfo(info)
})
suite.dist.LeaderViewManager.Update(targetNode, view)
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.GetChannelName(),
},
Node: targetNode,
View: view,
})
suite.dist.SegmentDistManager.Update(targetNode, distSegments...)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -1418,10 +1519,12 @@ func (suite *TaskSuite) TestNoExecutor() {
suite.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(suite.replica.GetID(), suite.collection, []int64{1, 2, 3, -1}))
// Test load segment task
suite.dist.ChannelDistManager.Update(targetNode, meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
}))
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: channel,
Node: targetNode,
Version: 1,
View: &meta.LeaderView{ID: targetNode, CollectionID: suite.collection, Channel: channel.ChannelName, Status: &querypb.LeaderViewStatus{Serviceable: true}},
})
segments := make([]*datapb.SegmentInfo, 0)
for _, segment := range suite.loadSegments {
segments = append(segments, &datapb.SegmentInfo{
@ -1517,6 +1620,7 @@ func (suite *TaskSuite) TestLeaderTaskRemove() {
CollectionID: suite.collection,
Channel: channel.ChannelName,
Segments: make(map[int64]*querypb.SegmentDist),
Status: &querypb.LeaderViewStatus{Serviceable: true},
}
segments := make([]*meta.Segment, 0)
tasks := []Task{}
@ -1543,7 +1647,15 @@ func (suite *TaskSuite) TestLeaderTaskRemove() {
suite.NoError(err)
}
suite.dist.SegmentDistManager.Update(targetNode, segments...)
suite.dist.LeaderViewManager.Update(targetNode, view)
suite.dist.ChannelDistManager.Update(targetNode, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: suite.collection,
ChannelName: channel.ChannelName,
},
Node: targetNode,
Version: 1,
View: view,
})
segmentsNum := len(suite.releaseSegments)
suite.AssertTaskNum(0, segmentsNum, 0, segmentsNum)
@ -1552,10 +1664,9 @@ func (suite *TaskSuite) TestLeaderTaskRemove() {
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(segmentsNum, 0, 0, segmentsNum)
// mock leader view which has removed all segments
view.Segments = make(map[int64]*querypb.SegmentDist)
suite.dist.LeaderViewManager.Update(targetNode, view)
// Process tasks done
// suite.dist.LeaderViewManager.Update(targetNode)
suite.dispatchAndWait(targetNode)
suite.AssertTaskNum(0, 0, 0, 0)
@ -1590,7 +1701,7 @@ func createReplica(collection int64, nodes ...int64) *meta.Replica {
func (suite *TaskSuite) TestBalanceChannelTask() {
ctx := context.Background()
collectionID := int64(1)
collectionID := suite.collection
partitionID := int64(1)
channel := "channel-1"
vchannel := &datapb.VchannelInfo{
@ -1625,27 +1736,25 @@ func (suite *TaskSuite) TestBalanceChannelTask() {
suite.target.UpdateCollectionCurrentTarget(ctx, collectionID)
suite.target.UpdateCollectionNextTarget(ctx, collectionID)
suite.dist.LeaderViewManager.Update(2, &meta.LeaderView{
ID: 2,
CollectionID: collectionID,
Channel: channel,
Segments: map[int64]*querypb.SegmentDist{
1: {},
2: {},
3: {},
suite.dist.ChannelDistManager.Update(2, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: collectionID,
ChannelName: channel,
},
Node: 2,
Version: 1,
View: &meta.LeaderView{
ID: 2,
CollectionID: collectionID,
Channel: channel,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: collectionID,
Channel: channel,
UnServiceableError: merr.ErrSegmentLack,
})
task, err := NewChannelTask(context.Background(),
10*time.Second,
WrapIDSource(2),
collectionID,
meta.NilReplica,
suite.replica,
NewChannelAction(1, ActionTypeGrow, channel),
NewChannelAction(2, ActionTypeReduce, channel),
)
@ -1655,14 +1764,19 @@ func (suite *TaskSuite) TestBalanceChannelTask() {
suite.scheduler.preProcess(task)
suite.Equal(0, task.step)
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: collectionID,
Channel: channel,
Segments: map[int64]*querypb.SegmentDist{
1: {},
2: {},
3: {},
suite.dist.ChannelDistManager.Update(1, &meta.DmChannel{
VchannelInfo: &datapb.VchannelInfo{
CollectionID: collectionID,
ChannelName: channel,
},
Node: 1,
Version: 2,
View: &meta.LeaderView{
ID: 1,
CollectionID: collectionID,
Channel: channel,
Version: 2,
Status: &querypb.LeaderViewStatus{Serviceable: true},
},
})
@ -1670,7 +1784,7 @@ func (suite *TaskSuite) TestBalanceChannelTask() {
suite.scheduler.preProcess(task)
suite.Equal(1, task.step)
suite.dist.LeaderViewManager.Update(2)
suite.dist.ChannelDistManager.Update(2)
// old delegator removed
suite.scheduler.preProcess(task)
suite.Equal(2, task.step)

View File

@ -207,6 +207,7 @@ func packSubChannelRequest(
loadMeta *querypb.LoadMetaInfo,
channel *meta.DmChannel,
indexInfo []*indexpb.IndexInfo,
partitions []int64,
) *querypb.WatchDmChannelsRequest {
return &querypb.WatchDmChannelsRequest{
Base: commonpbutil.NewMsgBase(
@ -215,6 +216,7 @@ func packSubChannelRequest(
),
NodeID: action.Node(),
CollectionID: task.CollectionID(),
PartitionIDs: partitions,
Infos: []*datapb.VchannelInfo{channel.VchannelInfo},
Schema: schema, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
LoadMeta: loadMeta, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3

View File

@ -21,7 +21,6 @@ import (
"fmt"
"time"
"go.uber.org/multierr"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
@ -105,36 +104,21 @@ func GetShardLeadersWithChannels(ctx context.Context, m *meta.Meta, targetMgr me
nodeMgr *session.NodeManager, collectionID int64, channels map[string]*meta.DmChannel,
) ([]*querypb.ShardLeadersList, error) {
ret := make([]*querypb.ShardLeadersList, 0)
replicas := m.ReplicaManager.GetByCollection(ctx, collectionID)
for _, channel := range channels {
log := log.Ctx(ctx).With(zap.String("channel", channel.GetChannelName()))
var channelErr error
leaders := dist.LeaderViewManager.GetByFilter(meta.WithChannelName2LeaderView(channel.GetChannelName()))
if len(leaders) == 0 {
channelErr = merr.WrapErrChannelLack(channel.GetChannelName(), "channel not subscribed")
}
readableLeaders := make(map[int64]*meta.LeaderView)
for _, leader := range leaders {
if leader.UnServiceableError != nil {
multierr.AppendInto(&channelErr, leader.UnServiceableError)
ids := make([]int64, 0, len(replicas))
addrs := make([]string, 0, len(replicas))
for _, replica := range replicas {
leader := dist.ChannelDistManager.GetShardLeader(channel.GetChannelName(), replica)
if leader == nil || !leader.IsServiceable() {
log.WithRateGroup("util.GetShardLeaders", 1, 60).
Warn("leader is not available in replica", zap.String("channel", channel.GetChannelName()), zap.Int64("replicaID", replica.GetID()))
continue
}
readableLeaders[leader.ID] = leader
}
if len(readableLeaders) == 0 {
msg := fmt.Sprintf("channel %s is not available in any replica", channel.GetChannelName())
log.Warn(msg, zap.Error(channelErr))
err := merr.WrapErrChannelNotAvailable(channel.GetChannelName(), channelErr.Error())
return nil, err
}
readableLeaders = filterDupLeaders(ctx, m.ReplicaManager, readableLeaders)
ids := make([]int64, 0, len(leaders))
addrs := make([]string, 0, len(leaders))
for _, leader := range readableLeaders {
info := nodeMgr.Get(leader.ID)
info := nodeMgr.Get(leader.Node)
if info != nil {
ids = append(ids, info.ID())
addrs = append(addrs, info.Addr())
@ -143,12 +127,9 @@ func GetShardLeadersWithChannels(ctx context.Context, m *meta.Meta, targetMgr me
// to avoid node down during GetShardLeaders
if len(ids) == 0 {
if channelErr == nil {
channelErr = merr.WrapErrChannelNotAvailable(channel.GetChannelName())
}
err := merr.WrapErrChannelNotAvailable(channel.GetChannelName())
msg := fmt.Sprintf("channel %s is not available in any replica", channel.GetChannelName())
log.Warn(msg, zap.Error(channelErr))
err := merr.WrapErrChannelNotAvailable(channel.GetChannelName(), channelErr.Error())
log.Warn(msg, zap.Error(err))
return nil, err
}

View File

@ -30,6 +30,8 @@ import (
"go.opentelemetry.io/otel"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
@ -87,7 +89,7 @@ type ShardDelegator interface {
LoadSegments(ctx context.Context, req *querypb.LoadSegmentsRequest) error
ReleaseSegments(ctx context.Context, req *querypb.ReleaseSegmentsRequest, force bool) error
SyncTargetVersion(newVersion int64, partitions []int64, growingInTarget []int64, sealedInTarget []int64, droppedInTarget []int64, checkpoint *msgpb.MsgPosition, deleteSeekPos *msgpb.MsgPosition)
GetTargetVersion() int64
GetQueryView() *channelQueryView
GetDeleteBufferSize() (entryNum int64, memorySize int64)
// manage exclude segments
@ -327,7 +329,12 @@ func (sd *shardDelegator) search(ctx context.Context, req *querypb.SearchRequest
return nil, err
}
results, err := executeSubTasks(ctx, tasks, func(ctx context.Context, req *querypb.SearchRequest, worker cluster.Worker) (*internalpb.SearchResults, error) {
return worker.SearchSegments(ctx, req)
resp, err := worker.SearchSegments(ctx, req)
status, ok := status.FromError(err)
if ok && status.Code() == codes.Unavailable {
sd.markSegmentOffline(req.GetSegmentIDs()...)
}
return resp, err
}, "Search", log)
if err != nil {
log.Warn("Delegator search failed", zap.Error(err))
@ -515,7 +522,12 @@ func (sd *shardDelegator) QueryStream(ctx context.Context, req *querypb.QueryReq
}
_, err = executeSubTasks(ctx, tasks, func(ctx context.Context, req *querypb.QueryRequest, worker cluster.Worker) (*internalpb.RetrieveResults, error) {
return nil, worker.QueryStreamSegments(ctx, req, srv)
err := worker.QueryStreamSegments(ctx, req, srv)
status, ok := status.FromError(err)
if ok && status.Code() == codes.Unavailable {
sd.markSegmentOffline(req.GetSegmentIDs()...)
}
return nil, err
}, "Query", log)
if err != nil {
log.Warn("Delegator query failed", zap.Error(err))
@ -595,7 +607,12 @@ func (sd *shardDelegator) Query(ctx context.Context, req *querypb.QueryRequest)
}
results, err := executeSubTasks(ctx, tasks, func(ctx context.Context, req *querypb.QueryRequest, worker cluster.Worker) (*internalpb.RetrieveResults, error) {
return worker.QuerySegments(ctx, req)
resp, err := worker.QuerySegments(ctx, req)
status, ok := status.FromError(err)
if ok && status.Code() == codes.Unavailable {
sd.markSegmentOffline(req.GetSegmentIDs()...)
}
return resp, err
}, "Query", log)
if err != nil {
log.Warn("Delegator query failed", zap.Error(err))
@ -1019,7 +1036,7 @@ func NewShardDelegator(ctx context.Context, collectionID UniqueID, replicaID Uni
segmentManager: manager.Segment,
workerManager: workerManager,
lifetime: lifetime.NewLifetime(lifetime.Initializing),
distribution: NewDistribution(),
distribution: NewDistribution(channel),
deleteBuffer: deletebuffer.NewListDeleteBuffer[*deletebuffer.Item](startTs, sizePerBlock,
[]string{fmt.Sprint(paramtable.GetNodeID()), channel}),
pkOracle: pkoracle.NewPkOracle(),

View File

@ -329,7 +329,7 @@ func (sd *shardDelegator) applyDelete(ctx context.Context,
// markSegmentOffline makes segment go offline and waits for QueryCoord to fix.
func (sd *shardDelegator) markSegmentOffline(segmentIDs ...int64) {
sd.distribution.AddOfflines(segmentIDs...)
sd.distribution.MarkOfflineSegments(segmentIDs...)
}
// addGrowing add growing segment record for delegator.
@ -1026,8 +1026,8 @@ func (sd *shardDelegator) SyncTargetVersion(
sd.RefreshLevel0DeletionStats()
}
func (sd *shardDelegator) GetTargetVersion() int64 {
return sd.distribution.getTargetVersion()
func (sd *shardDelegator) GetQueryView() *channelQueryView {
return sd.distribution.queryView
}
func (sd *shardDelegator) AddExcludedSegments(excludeInfo map[int64]uint64) {

View File

@ -414,9 +414,12 @@ func (s *DelegatorDataSuite) TestProcessDelete() {
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", s.collectionID),
},
},
Version: time.Now().UnixNano(),
})
s.Require().NoError(err)
// sync target version, make delegator serviceable
s.delegator.SyncTargetVersion(time.Now().UnixNano(), []int64{500}, []int64{1001}, []int64{1000}, nil, &msgpb.MsgPosition{}, &msgpb.MsgPosition{})
s.delegator.ProcessDelete([]*DeleteData{
{
PartitionID: 500,
@ -471,7 +474,7 @@ func (s *DelegatorDataSuite) TestProcessDelete() {
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", s.collectionID),
},
},
Version: 1,
Version: time.Now().UnixNano(),
})
s.Require().NoError(err)
s.True(s.delegator.distribution.Serviceable())
@ -506,7 +509,7 @@ func (s *DelegatorDataSuite) TestProcessDelete() {
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", s.collectionID),
},
},
Version: 2,
Version: time.Now().UnixNano(),
})
s.Require().NoError(err)
s.True(s.delegator.distribution.Serviceable())
@ -1409,7 +1412,7 @@ func (s *DelegatorDataSuite) TestSyncTargetVersion() {
}
s.delegator.SyncTargetVersion(int64(5), []int64{1}, []int64{1}, []int64{2}, []int64{3, 4}, &msgpb.MsgPosition{}, &msgpb.MsgPosition{})
s.Equal(int64(5), s.delegator.GetTargetVersion())
s.Equal(int64(5), s.delegator.GetQueryView().GetVersion())
}
func (s *DelegatorDataSuite) TestLevel0Deletions() {

View File

@ -522,7 +522,7 @@ func (s *DelegatorSuite) TestSearch() {
sd, ok := s.delegator.(*shardDelegator)
s.Require().True(ok)
sd.distribution.AddOfflines(1001)
sd.distribution.MarkOfflineSegments(1001)
_, err := s.delegator.Search(ctx, &querypb.SearchRequest{
Req: &internalpb.SearchRequest{
@ -709,7 +709,7 @@ func (s *DelegatorSuite) TestQuery() {
sd, ok := s.delegator.(*shardDelegator)
s.Require().True(ok)
sd.distribution.AddOfflines(1001)
sd.distribution.MarkOfflineSegments(1001)
_, err := s.delegator.Query(ctx, &querypb.QueryRequest{
Req: &internalpb.RetrieveRequest{Base: commonpbutil.NewMsgBase()},
@ -987,7 +987,7 @@ func (s *DelegatorSuite) TestQueryStream() {
sd, ok := s.delegator.(*shardDelegator)
s.Require().True(ok)
sd.distribution.AddOfflines(1001)
sd.distribution.MarkOfflineSegments(1001)
client := streamrpc.NewLocalQueryClient(ctx)
server := client.CreateServer()
@ -1164,7 +1164,7 @@ func (s *DelegatorSuite) TestGetStats() {
sd, ok := s.delegator.(*shardDelegator)
s.Require().True(ok)
sd.distribution.AddOfflines(1001)
sd.distribution.MarkOfflineSegments(1001)
_, err := s.delegator.GetStatistics(ctx, &querypb.GetStatisticsRequest{
Req: &internalpb.GetStatisticsRequest{Base: commonpbutil.NewMsgBase()},
@ -1265,7 +1265,7 @@ func (s *DelegatorSuite) TestUpdateSchema() {
sd, ok := s.delegator.(*shardDelegator)
s.Require().True(ok)
sd.distribution.AddOfflines(1001)
sd.distribution.MarkOfflineSegments(1001)
err := s.delegator.UpdateSchema(ctx, &schemapb.CollectionSchema{}, 100)
s.Error(err)

View File

@ -55,22 +55,34 @@ func getClosedCh() chan struct{} {
return closedCh
}
// channelQueryView maintains the sealed segment list which should be used for search/query.
type channelQueryView struct {
sealedSegments []int64 // sealed segment list which should be used for search/query
partitions typeutil.UniqueSet // partitions list which sealed segments belong to
version int64 // version of current query view, same as targetVersion in qc
serviceable *atomic.Bool
}
func (q *channelQueryView) GetVersion() int64 {
return q.version
}
func (q *channelQueryView) Serviceable() bool {
return q.serviceable.Load()
}
// distribution is the struct to store segment distribution.
// it contains both growing and sealed segments.
type distribution struct {
// segments information
// map[SegmentID]=>segmentEntry
targetVersion *atomic.Int64
growingSegments map[UniqueID]SegmentEntry
sealedSegments map[UniqueID]SegmentEntry
// snapshotVersion indicator
snapshotVersion int64
// quick flag for current snapshot is serviceable
serviceable *atomic.Bool
offlines typeutil.Set[int64]
snapshots *typeutil.ConcurrentMap[int64, *snapshot]
snapshots *typeutil.ConcurrentMap[int64, *snapshot]
// current is the snapshot for quick usage for search/query
// generated for each change of distribution
current *atomic.Pointer[snapshot]
@ -78,6 +90,10 @@ type distribution struct {
idfOracle IDFOracle
// protects current & segments
mut sync.RWMutex
// distribution info
channelName string
queryView *channelQueryView
}
// SegmentEntry stores the segment meta information.
@ -88,18 +104,22 @@ type SegmentEntry struct {
Version int64
TargetVersion int64
Level datapb.SegmentLevel
Offline bool // if delegator failed to execute forwardDelete/Query/Search on segment, it will be offline
}
// NewDistribution creates a new distribution instance with all field initialized.
func NewDistribution() *distribution {
func NewDistribution(channelName string) *distribution {
dist := &distribution{
serviceable: atomic.NewBool(false),
channelName: channelName,
growingSegments: make(map[UniqueID]SegmentEntry),
sealedSegments: make(map[UniqueID]SegmentEntry),
snapshots: typeutil.NewConcurrentMap[int64, *snapshot](),
current: atomic.NewPointer[snapshot](nil),
offlines: typeutil.NewSet[int64](),
targetVersion: atomic.NewInt64(initialTargetVersion),
queryView: &channelQueryView{
serviceable: atomic.NewBool(false),
partitions: typeutil.NewSet[int64](),
version: initialTargetVersion,
},
}
dist.genSnapshot()
@ -142,13 +162,11 @@ func (d *distribution) PinOnlineSegments(partitions ...int64) (sealed []Snapshot
current := d.current.Load()
sealed, growing = current.Get(partitions...)
version = current.version
filterOnline := func(entry SegmentEntry, _ int) bool {
return !d.offlines.Contain(entry.SegmentID)
return !entry.Offline
}
sealed, growing = d.filterSegments(sealed, growing, filterOnline)
version = current.version
return
}
@ -195,7 +213,26 @@ func (d *distribution) getTargetVersion() int64 {
// Serviceable returns wether current snapshot is serviceable.
func (d *distribution) Serviceable() bool {
return d.serviceable.Load()
return d.queryView.serviceable.Load()
}
func (d *distribution) updateServiceable(triggerAction string) {
if d.queryView.version != initialTargetVersion {
serviceable := true
for _, s := range d.queryView.sealedSegments {
if entry, ok := d.sealedSegments[s]; !ok || entry.Offline {
serviceable = false
break
}
}
if serviceable != d.queryView.serviceable.Load() {
d.queryView.serviceable.Store(serviceable)
log.Info("channel distribution serviceable changed",
zap.String("channel", d.channelName),
zap.Bool("serviceable", serviceable),
zap.String("action", triggerAction))
}
}
}
// AddDistributions add multiple segment entries.
@ -224,10 +261,10 @@ func (d *distribution) AddDistributions(entries ...SegmentEntry) {
entry.TargetVersion = unreadableTargetVersion
}
d.sealedSegments[entry.SegmentID] = entry
d.offlines.Remove(entry.SegmentID)
}
d.genSnapshot()
d.updateServiceable("AddDistributions")
}
// AddGrowing adds growing segment distribution.
@ -243,7 +280,7 @@ func (d *distribution) AddGrowing(entries ...SegmentEntry) {
}
// AddOffline set segmentIDs to offlines.
func (d *distribution) AddOfflines(segmentIDs ...int64) {
func (d *distribution) MarkOfflineSegments(segmentIDs ...int64) {
d.mut.Lock()
defer d.mut.Unlock()
@ -253,17 +290,19 @@ func (d *distribution) AddOfflines(segmentIDs ...int64) {
if !ok {
continue
}
// FIXME: remove offlie logic later
// mark segment distribution as offline, set verion to unreadable
entry.NodeID = wildcardNodeID
entry.Version = unreadableTargetVersion
d.sealedSegments[segmentID] = entry
updated = true
d.offlines.Insert(segmentID)
entry.Offline = true
entry.Version = unreadableTargetVersion
entry.NodeID = -1
d.sealedSegments[segmentID] = entry
}
if updated {
log.Info("mark sealed segment offline from distribution",
zap.String("channelName", d.channelName),
zap.Int64s("segmentIDs", segmentIDs))
d.genSnapshot()
d.updateServiceable("MarkOfflineSegments")
}
}
@ -292,25 +331,30 @@ func (d *distribution) SyncTargetVersion(newVersion int64, partitions []int64, g
d.growingSegments[segmentID] = entry
}
available := true
for _, segmentID := range sealedInTarget {
entry, ok := d.sealedSegments[segmentID]
if !ok {
log.Warn("readable sealed segment lost, make it unserviceable", zap.Int64("segmentID", segmentID))
available = false
continue
}
entry.TargetVersion = newVersion
d.sealedSegments[segmentID] = entry
}
oldValue := d.targetVersion.Load()
d.targetVersion.Store(newVersion)
oldValue := d.queryView.version
d.queryView = &channelQueryView{
sealedSegments: sealedInTarget,
partitions: typeutil.NewUniqueSet(partitions...),
version: newVersion,
serviceable: d.queryView.serviceable,
}
// update working partition list
d.genSnapshot(WithPartitions(partitions))
d.genSnapshot()
// if sealed segment in leader view is less than sealed segment in target, set delegator to unserviceable
d.serviceable.Store(available)
log.Info("Update readable segment version",
d.updateServiceable("SyncTargetVersion")
log.Info("Update channel query view",
zap.String("channel", d.channelName),
zap.Int64s("partitions", partitions),
zap.Int64("oldVersion", oldValue),
zap.Int64("newVersion", newVersion),
@ -325,9 +369,6 @@ func (d *distribution) RemoveDistributions(sealedSegments []SegmentEntry, growin
defer d.mut.Unlock()
for _, sealed := range sealedSegments {
if d.offlines.Contain(sealed.SegmentID) {
d.offlines.Remove(sealed.SegmentID)
}
entry, ok := d.sealedSegments[sealed.SegmentID]
if !ok {
continue
@ -346,6 +387,13 @@ func (d *distribution) RemoveDistributions(sealedSegments []SegmentEntry, growin
delete(d.growingSegments, growing.SegmentID)
}
log.Info("remove segments from distribution",
zap.String("channelName", d.channelName),
zap.Int64s("growing", lo.Map(growingSegments, func(s SegmentEntry, _ int) int64 { return s.SegmentID })),
zap.Int64s("sealed", lo.Map(sealedSegments, func(s SegmentEntry, _ int) int64 { return s.SegmentID })),
)
d.updateServiceable("RemoveDistributions")
// wait previous read even not distribution changed
// in case of segment balance caused segment lost track
return d.genSnapshot()
@ -354,20 +402,10 @@ func (d *distribution) RemoveDistributions(sealedSegments []SegmentEntry, growin
// getSnapshot converts current distribution to snapshot format.
// in which, user could use found nodeID=>segmentID list.
// mutex RLock is required before calling this method.
func (d *distribution) genSnapshot(opts ...genSnapshotOpt) chan struct{} {
func (d *distribution) genSnapshot() chan struct{} {
// stores last snapshot
// ok to be nil
last := d.current.Load()
option := &genSnapshotOption{
partitions: typeutil.NewSet[int64](), // if no working list provided, snapshot shall have no item
}
// use last snapshot working parition list by default
if last != nil {
option.partitions = last.partitions
}
for _, opt := range opts {
opt(option)
}
nodeSegments := make(map[int64][]SegmentEntry)
for _, entry := range d.sealedSegments {
@ -380,7 +418,7 @@ func (d *distribution) genSnapshot(opts ...genSnapshotOpt) chan struct{} {
dist = append(dist, SnapshotItem{
NodeID: nodeID,
Segments: lo.Map(items, func(entry SegmentEntry, _ int) SegmentEntry {
if !option.partitions.Contain(entry.PartitionID) {
if !d.queryView.partitions.Contain(entry.PartitionID) {
entry.TargetVersion = unreadableTargetVersion
}
return entry
@ -390,18 +428,16 @@ func (d *distribution) genSnapshot(opts ...genSnapshotOpt) chan struct{} {
growing := make([]SegmentEntry, 0, len(d.growingSegments))
for _, entry := range d.growingSegments {
if !option.partitions.Contain(entry.PartitionID) {
if !d.queryView.partitions.Contain(entry.PartitionID) {
entry.TargetVersion = unreadableTargetVersion
}
growing = append(growing, entry)
}
d.serviceable.Store(d.offlines.Len() == 0)
// update snapshot version
d.snapshotVersion++
newSnapShot := NewSnapshot(dist, growing, last, d.snapshotVersion, d.targetVersion.Load())
newSnapShot.partitions = option.partitions
newSnapShot := NewSnapshot(dist, growing, last, d.snapshotVersion, d.queryView.GetVersion())
newSnapShot.partitions = d.queryView.partitions
d.current.Store(newSnapShot)
// shall be a new one
@ -435,15 +471,3 @@ func (d *distribution) getCleanup(version int64) snapshotCleanup {
d.snapshots.GetAndRemove(version)
}
}
type genSnapshotOption struct {
partitions typeutil.Set[int64]
}
type genSnapshotOpt func(*genSnapshotOption)
func WithPartitions(partitions []int64) genSnapshotOpt {
return func(opt *genSnapshotOption) {
opt.partitions = typeutil.NewSet(partitions...)
}
}

View File

@ -20,6 +20,7 @@ import (
"testing"
"time"
"github.com/samber/lo"
"github.com/stretchr/testify/suite"
)
@ -29,8 +30,7 @@ type DistributionSuite struct {
}
func (s *DistributionSuite) SetupTest() {
s.dist = NewDistribution()
s.Equal(initialTargetVersion, s.dist.getTargetVersion())
s.dist = NewDistribution("channel-1")
}
func (s *DistributionSuite) TearDownTest() {
@ -177,6 +177,7 @@ func (s *DistributionSuite) TestAddDistribution() {
s.SetupTest()
defer s.TearDownTest()
s.dist.AddGrowing(tc.growing...)
s.dist.SyncTargetVersion(1000, nil, nil, nil, nil)
_, _, version, err := s.dist.PinReadableSegments()
s.Require().NoError(err)
s.dist.AddDistributions(tc.input...)
@ -447,6 +448,15 @@ func (s *DistributionSuite) TestRemoveDistribution() {
s.dist.AddGrowing(tc.presetGrowing...)
s.dist.AddDistributions(tc.presetSealed...)
// update target version, make delegator serviceable
growingIDs := lo.Map(tc.presetGrowing, func(item SegmentEntry, idx int) int64 {
return item.SegmentID
})
sealedIDs := lo.Map(tc.presetSealed, func(item SegmentEntry, idx int) int64 {
return item.SegmentID
})
s.dist.SyncTargetVersion(time.Now().Unix(), nil, growingIDs, sealedIDs, nil)
var version int64
if tc.withMockRead {
var err error
@ -614,7 +624,7 @@ func (s *DistributionSuite) TestPeek() {
}
}
func (s *DistributionSuite) TestAddOfflines() {
func (s *DistributionSuite) TestMarkOfflineSegments() {
type testCase struct {
tag string
input []SegmentEntry
@ -665,12 +675,14 @@ func (s *DistributionSuite) TestAddOfflines() {
defer s.TearDownTest()
s.dist.AddDistributions(tc.input...)
s.dist.AddOfflines(tc.offlines...)
sealedSegmentID := lo.Map(tc.input, func(t SegmentEntry, _ int) int64 {
return t.SegmentID
})
s.dist.SyncTargetVersion(1000, nil, nil, sealedSegmentID, nil)
s.dist.MarkOfflineSegments(tc.offlines...)
s.Equal(tc.serviceable, s.dist.Serviceable())
// current := s.dist.current.Load()
for _, offline := range tc.offlines {
// current.
s.dist.mut.RLock()
entry, ok := s.dist.sealedSegments[offline]
s.dist.mut.RUnlock()
@ -739,7 +751,7 @@ func (s *DistributionSuite) Test_SyncTargetVersion() {
s.Len(s1[0].Segments, 3)
s.Len(s2, 3)
s.dist.serviceable.Store(true)
s.dist.queryView.serviceable.Store(true)
s.dist.SyncTargetVersion(2, []int64{1}, []int64{222}, []int64{}, []int64{})
s.True(s.dist.Serviceable())

View File

@ -243,6 +243,53 @@ func (_c *MockShardDelegator_GetPartitionStatsVersions_Call) RunAndReturn(run fu
return _c
}
// GetQueryView provides a mock function with no fields
func (_m *MockShardDelegator) GetQueryView() *channelQueryView {
ret := _m.Called()
if len(ret) == 0 {
panic("no return value specified for GetQueryView")
}
var r0 *channelQueryView
if rf, ok := ret.Get(0).(func() *channelQueryView); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*channelQueryView)
}
}
return r0
}
// MockShardDelegator_GetQueryView_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetQueryView'
type MockShardDelegator_GetQueryView_Call struct {
*mock.Call
}
// GetQueryView is a helper method to define mock.On call
func (_e *MockShardDelegator_Expecter) GetQueryView() *MockShardDelegator_GetQueryView_Call {
return &MockShardDelegator_GetQueryView_Call{Call: _e.mock.On("GetQueryView")}
}
func (_c *MockShardDelegator_GetQueryView_Call) Run(run func()) *MockShardDelegator_GetQueryView_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *MockShardDelegator_GetQueryView_Call) Return(_a0 *channelQueryView) *MockShardDelegator_GetQueryView_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockShardDelegator_GetQueryView_Call) RunAndReturn(run func() *channelQueryView) *MockShardDelegator_GetQueryView_Call {
_c.Call.Return(run)
return _c
}
// GetSegmentInfo provides a mock function with given fields: readable
func (_m *MockShardDelegator) GetSegmentInfo(readable bool) ([]SnapshotItem, []SegmentEntry) {
ret := _m.Called(readable)
@ -407,51 +454,6 @@ func (_c *MockShardDelegator_GetTSafe_Call) RunAndReturn(run func() uint64) *Moc
return _c
}
// GetTargetVersion provides a mock function with no fields
func (_m *MockShardDelegator) GetTargetVersion() int64 {
ret := _m.Called()
if len(ret) == 0 {
panic("no return value specified for GetTargetVersion")
}
var r0 int64
if rf, ok := ret.Get(0).(func() int64); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(int64)
}
return r0
}
// MockShardDelegator_GetTargetVersion_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTargetVersion'
type MockShardDelegator_GetTargetVersion_Call struct {
*mock.Call
}
// GetTargetVersion is a helper method to define mock.On call
func (_e *MockShardDelegator_Expecter) GetTargetVersion() *MockShardDelegator_GetTargetVersion_Call {
return &MockShardDelegator_GetTargetVersion_Call{Call: _e.mock.On("GetTargetVersion")}
}
func (_c *MockShardDelegator_GetTargetVersion_Call) Run(run func()) *MockShardDelegator_GetTargetVersion_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *MockShardDelegator_GetTargetVersion_Call) Return(_a0 int64) *MockShardDelegator_GetTargetVersion_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockShardDelegator_GetTargetVersion_Call) RunAndReturn(run func() int64) *MockShardDelegator_GetTargetVersion_Call {
_c.Call.Return(run)
return _c
}
// LoadGrowing provides a mock function with given fields: ctx, infos, version
func (_m *MockShardDelegator) LoadGrowing(ctx context.Context, infos []*querypb.SegmentLoadInfo, version int64) error {
ret := _m.Called(ctx, infos, version)

View File

@ -1253,14 +1253,18 @@ func (node *QueryNode) GetDataDistribution(ctx context.Context, req *querypb.Get
numOfGrowingRows += segment.InsertCount()
}
queryView := delegator.GetQueryView()
leaderViews = append(leaderViews, &querypb.LeaderView{
Collection: delegator.Collection(),
Channel: key,
SegmentDist: sealedSegments,
GrowingSegments: growingSegments,
TargetVersion: delegator.GetTargetVersion(),
NumOfGrowingRows: numOfGrowingRows,
PartitionStatsVersions: delegator.GetPartitionStatsVersions(ctx),
TargetVersion: queryView.GetVersion(),
Status: &querypb.LeaderViewStatus{
Serviceable: queryView.Serviceable(),
},
})
return true
})

View File

@ -212,6 +212,7 @@ func (suite *ServiceSuite) TestGetStatistics_Normal() {
ctx := context.Background()
suite.TestWatchDmChannelsInt64()
suite.TestLoadSegments_Int64()
suite.syncDistribution(context.TODO())
req := &querypb.GetStatisticsRequest{
Req: &internalpb.GetStatisticsRequest{

View File

@ -618,9 +618,14 @@ message LeaderView {
map<int64, SegmentDist> segment_dist = 3;
repeated int64 growing_segmentIDs = 4;
map<int64, msg.MsgPosition> growing_segments = 5;
int64 TargetVersion = 6;
int64 TargetVersion = 6; // deprecated
int64 num_of_growing_rows = 7;
map<int64, int64> partition_stats_versions = 8;
LeaderViewStatus status = 9;
}
message LeaderViewStatus {
bool serviceable = 10;
}
message SegmentDist {

File diff suppressed because it is too large Load Diff