mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: Refactor balance checker with priority queue (#43992)
issue: #43858 Refactor the balance checker implementation to use priority queues for managing collection balance operations, improving processing efficiency and order control. Changes include: - Export priority queue interfaces (Item, BaseItem, PriorityQueue) - Replace collection round-robin with priority-based queue system - Add BalanceCheckCollectionMaxCount configuration parameter - Optimize balance task generation with batch processing limits - Refactor processBalanceQueue method for different strategies - Enhance test coverage with comprehensive unit tests The new priority queue system processes collections based on row count or collection ID order, providing better control over balance operation priorities and resource utilization. --------- Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
parent
bed94fc061
commit
6d4961b978
@ -20,24 +20,24 @@ import (
|
|||||||
"container/heap"
|
"container/heap"
|
||||||
)
|
)
|
||||||
|
|
||||||
type item interface {
|
type Item interface {
|
||||||
getPriority() int
|
getPriority() int
|
||||||
setPriority(priority int)
|
setPriority(priority int)
|
||||||
}
|
}
|
||||||
|
|
||||||
type baseItem struct {
|
type BaseItem struct {
|
||||||
priority int
|
priority int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *baseItem) getPriority() int {
|
func (b *BaseItem) getPriority() int {
|
||||||
return b.priority
|
return b.priority
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *baseItem) setPriority(priority int) {
|
func (b *BaseItem) setPriority(priority int) {
|
||||||
b.priority = priority
|
b.priority = priority
|
||||||
}
|
}
|
||||||
|
|
||||||
type heapQueue []item
|
type heapQueue []Item
|
||||||
|
|
||||||
func (hq heapQueue) Len() int {
|
func (hq heapQueue) Len() int {
|
||||||
return len(hq)
|
return len(hq)
|
||||||
@ -52,7 +52,7 @@ func (hq heapQueue) Swap(i, j int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (hq *heapQueue) Push(x any) {
|
func (hq *heapQueue) Push(x any) {
|
||||||
i := x.(item)
|
i := x.(Item)
|
||||||
*hq = append(*hq, i)
|
*hq = append(*hq, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,22 +64,30 @@ func (hq *heapQueue) Pop() any {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
type priorityQueue struct {
|
type PriorityQueue struct {
|
||||||
heapQueue
|
heapQueue
|
||||||
}
|
}
|
||||||
|
|
||||||
func newPriorityQueue() priorityQueue {
|
func NewPriorityQueue() PriorityQueue {
|
||||||
hq := make(heapQueue, 0)
|
hq := make(heapQueue, 0)
|
||||||
heap.Init(&hq)
|
heap.Init(&hq)
|
||||||
return priorityQueue{
|
return PriorityQueue{
|
||||||
heapQueue: hq,
|
heapQueue: hq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pq *priorityQueue) push(item item) {
|
func NewPriorityQueuePtr() *PriorityQueue {
|
||||||
|
hq := make(heapQueue, 0)
|
||||||
|
heap.Init(&hq)
|
||||||
|
return &PriorityQueue{
|
||||||
|
heapQueue: hq,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *PriorityQueue) Push(item Item) {
|
||||||
heap.Push(&pq.heapQueue, item)
|
heap.Push(&pq.heapQueue, item)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pq *priorityQueue) pop() item {
|
func (pq *PriorityQueue) Pop() Item {
|
||||||
return heap.Pop(&pq.heapQueue).(item)
|
return heap.Pop(&pq.heapQueue).(Item)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,74 +23,74 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestMinPriorityQueue(t *testing.T) {
|
func TestMinPriorityQueue(t *testing.T) {
|
||||||
pq := newPriorityQueue()
|
pq := NewPriorityQueue()
|
||||||
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
priority := i % 3
|
priority := i % 3
|
||||||
nodeItem := newNodeItem(priority, int64(i))
|
nodeItem := newNodeItem(priority, int64(i))
|
||||||
pq.push(&nodeItem)
|
pq.Push(&nodeItem)
|
||||||
}
|
}
|
||||||
|
|
||||||
item := pq.pop()
|
item := pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 0)
|
assert.Equal(t, item.getPriority(), 0)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 0)
|
assert.Equal(t, item.getPriority(), 0)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(3))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(3))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 1)
|
assert.Equal(t, item.getPriority(), 1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(1))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(1))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 1)
|
assert.Equal(t, item.getPriority(), 1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(4))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(4))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 2)
|
assert.Equal(t, item.getPriority(), 2)
|
||||||
println(item.getPriority())
|
println(item.getPriority())
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(2))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(2))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPopPriorityQueue(t *testing.T) {
|
func TestPopPriorityQueue(t *testing.T) {
|
||||||
pq := newPriorityQueue()
|
pq := NewPriorityQueue()
|
||||||
|
|
||||||
for i := 0; i < 1; i++ {
|
for i := 0; i < 1; i++ {
|
||||||
priority := 1
|
priority := 1
|
||||||
nodeItem := newNodeItem(priority, int64(i))
|
nodeItem := newNodeItem(priority, int64(i))
|
||||||
pq.push(&nodeItem)
|
pq.Push(&nodeItem)
|
||||||
}
|
}
|
||||||
|
|
||||||
item := pq.pop()
|
item := pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 1)
|
assert.Equal(t, item.getPriority(), 1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
||||||
pq.push(item)
|
pq.Push(item)
|
||||||
|
|
||||||
// if it's round robin, but not working
|
// if it's round robin, but not working
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 1)
|
assert.Equal(t, item.getPriority(), 1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMaxPriorityQueue(t *testing.T) {
|
func TestMaxPriorityQueue(t *testing.T) {
|
||||||
pq := newPriorityQueue()
|
pq := NewPriorityQueue()
|
||||||
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
priority := i % 3
|
priority := i % 3
|
||||||
nodeItem := newNodeItem(-priority, int64(i))
|
nodeItem := newNodeItem(-priority, int64(i))
|
||||||
pq.push(&nodeItem)
|
pq.Push(&nodeItem)
|
||||||
}
|
}
|
||||||
|
|
||||||
item := pq.pop()
|
item := pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), -2)
|
assert.Equal(t, item.getPriority(), -2)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(2))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(2))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), -1)
|
assert.Equal(t, item.getPriority(), -1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(4))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(4))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), -1)
|
assert.Equal(t, item.getPriority(), -1)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(1))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(1))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 0)
|
assert.Equal(t, item.getPriority(), 0)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(3))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(3))
|
||||||
item = pq.pop()
|
item = pq.Pop()
|
||||||
assert.Equal(t, item.getPriority(), 0)
|
assert.Equal(t, item.getPriority(), 0)
|
||||||
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
assert.Equal(t, item.(*nodeItem).nodeID, int64(0))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -57,9 +57,9 @@ func (b *RowCountBasedBalancer) AssignSegment(ctx context.Context, collectionID
|
|||||||
if len(nodeItems) == 0 {
|
if len(nodeItems) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
queue := newPriorityQueue()
|
queue := NewPriorityQueue()
|
||||||
for _, item := range nodeItems {
|
for _, item := range nodeItems {
|
||||||
queue.push(item)
|
queue.Push(item)
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Slice(segments, func(i, j int) bool {
|
sort.Slice(segments, func(i, j int) bool {
|
||||||
@ -70,7 +70,7 @@ func (b *RowCountBasedBalancer) AssignSegment(ctx context.Context, collectionID
|
|||||||
plans := make([]SegmentAssignPlan, 0, len(segments))
|
plans := make([]SegmentAssignPlan, 0, len(segments))
|
||||||
for _, s := range segments {
|
for _, s := range segments {
|
||||||
// pick the node with the least row count and allocate to it.
|
// pick the node with the least row count and allocate to it.
|
||||||
ni := queue.pop().(*nodeItem)
|
ni := queue.Pop().(*nodeItem)
|
||||||
plan := SegmentAssignPlan{
|
plan := SegmentAssignPlan{
|
||||||
From: -1,
|
From: -1,
|
||||||
To: ni.nodeID,
|
To: ni.nodeID,
|
||||||
@ -82,7 +82,7 @@ func (b *RowCountBasedBalancer) AssignSegment(ctx context.Context, collectionID
|
|||||||
}
|
}
|
||||||
// change node's score and push back
|
// change node's score and push back
|
||||||
ni.AddCurrentScoreDelta(float64(s.GetNumOfRows()))
|
ni.AddCurrentScoreDelta(float64(s.GetNumOfRows()))
|
||||||
queue.push(ni)
|
queue.Push(ni)
|
||||||
}
|
}
|
||||||
return plans
|
return plans
|
||||||
}
|
}
|
||||||
@ -108,9 +108,9 @@ func (b *RowCountBasedBalancer) AssignChannel(ctx context.Context, collectionID
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
queue := newPriorityQueue()
|
queue := NewPriorityQueue()
|
||||||
for _, item := range nodeItems {
|
for _, item := range nodeItems {
|
||||||
queue.push(item)
|
queue.Push(item)
|
||||||
}
|
}
|
||||||
|
|
||||||
plans := make([]ChannelAssignPlan, 0)
|
plans := make([]ChannelAssignPlan, 0)
|
||||||
@ -125,7 +125,7 @@ func (b *RowCountBasedBalancer) AssignChannel(ctx context.Context, collectionID
|
|||||||
}
|
}
|
||||||
if ni == nil {
|
if ni == nil {
|
||||||
// pick the node with the least channel num and allocate to it.
|
// pick the node with the least channel num and allocate to it.
|
||||||
ni = queue.pop().(*nodeItem)
|
ni = queue.Pop().(*nodeItem)
|
||||||
}
|
}
|
||||||
plan := ChannelAssignPlan{
|
plan := ChannelAssignPlan{
|
||||||
From: -1,
|
From: -1,
|
||||||
@ -135,7 +135,7 @@ func (b *RowCountBasedBalancer) AssignChannel(ctx context.Context, collectionID
|
|||||||
plans = append(plans, plan)
|
plans = append(plans, plan)
|
||||||
// change node's score and push back
|
// change node's score and push back
|
||||||
ni.AddCurrentScoreDelta(1)
|
ni.AddCurrentScoreDelta(1)
|
||||||
queue.push(ni)
|
queue.Push(ni)
|
||||||
}
|
}
|
||||||
return plans
|
return plans
|
||||||
}
|
}
|
||||||
@ -408,7 +408,7 @@ func NewRowCountBasedBalancer(
|
|||||||
}
|
}
|
||||||
|
|
||||||
type nodeItem struct {
|
type nodeItem struct {
|
||||||
baseItem
|
BaseItem
|
||||||
fmt.Stringer
|
fmt.Stringer
|
||||||
nodeID int64
|
nodeID int64
|
||||||
assignedScore float64
|
assignedScore float64
|
||||||
@ -417,7 +417,7 @@ type nodeItem struct {
|
|||||||
|
|
||||||
func newNodeItem(currentScore int, nodeID int64) nodeItem {
|
func newNodeItem(currentScore int, nodeID int64) nodeItem {
|
||||||
return nodeItem{
|
return nodeItem{
|
||||||
baseItem: baseItem{},
|
BaseItem: BaseItem{},
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
currentScore: float64(currentScore),
|
currentScore: float64(currentScore),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -79,9 +79,9 @@ func (b *ScoreBasedBalancer) assignSegment(br *balanceReport, collectionID int64
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
queue := newPriorityQueue()
|
queue := NewPriorityQueue()
|
||||||
for _, item := range nodeItemsMap {
|
for _, item := range nodeItemsMap {
|
||||||
queue.push(item)
|
queue.Push(item)
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort segments by segment row count, if segment has same row count, sort by node's score
|
// sort segments by segment row count, if segment has same row count, sort by node's score
|
||||||
@ -100,9 +100,9 @@ func (b *ScoreBasedBalancer) assignSegment(br *balanceReport, collectionID int64
|
|||||||
for _, s := range segments {
|
for _, s := range segments {
|
||||||
func(s *meta.Segment) {
|
func(s *meta.Segment) {
|
||||||
// for each segment, pick the node with the least score
|
// for each segment, pick the node with the least score
|
||||||
targetNode := queue.pop().(*nodeItem)
|
targetNode := queue.Pop().(*nodeItem)
|
||||||
// make sure candidate is always push back
|
// make sure candidate is always push back
|
||||||
defer queue.push(targetNode)
|
defer queue.Push(targetNode)
|
||||||
scoreChanges := b.calculateSegmentScore(s)
|
scoreChanges := b.calculateSegmentScore(s)
|
||||||
|
|
||||||
sourceNode := nodeItemsMap[s.Node]
|
sourceNode := nodeItemsMap[s.Node]
|
||||||
@ -173,9 +173,9 @@ func (b *ScoreBasedBalancer) assignChannel(br *balanceReport, collectionID int64
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
queue := newPriorityQueue()
|
queue := NewPriorityQueue()
|
||||||
for _, item := range nodeItemsMap {
|
for _, item := range nodeItemsMap {
|
||||||
queue.push(item)
|
queue.Push(item)
|
||||||
}
|
}
|
||||||
plans := make([]ChannelAssignPlan, 0, len(channels))
|
plans := make([]ChannelAssignPlan, 0, len(channels))
|
||||||
for _, ch := range channels {
|
for _, ch := range channels {
|
||||||
@ -193,10 +193,10 @@ func (b *ScoreBasedBalancer) assignChannel(br *balanceReport, collectionID int64
|
|||||||
}
|
}
|
||||||
// for each channel, pick the node with the least score
|
// for each channel, pick the node with the least score
|
||||||
if targetNode == nil {
|
if targetNode == nil {
|
||||||
targetNode = queue.pop().(*nodeItem)
|
targetNode = queue.Pop().(*nodeItem)
|
||||||
}
|
}
|
||||||
// make sure candidate is always push back
|
// make sure candidate is always push back
|
||||||
defer queue.push(targetNode)
|
defer queue.Push(targetNode)
|
||||||
scoreChanges := b.calculateChannelScore(ch, collectionID)
|
scoreChanges := b.calculateChannelScore(ch, collectionID)
|
||||||
|
|
||||||
sourceNode := nodeItemsMap[ch.Node]
|
sourceNode := nodeItemsMap[ch.Node]
|
||||||
|
|||||||
@ -18,7 +18,6 @@ package checkers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"sort"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -36,22 +35,111 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// balanceConfig holds all configuration parameters for balance operations.
|
||||||
|
// This configuration controls how balance tasks are generated and executed.
|
||||||
|
type balanceConfig struct {
|
||||||
|
// segmentBatchSize specifies the maximum number of segment balance tasks to generate in one round
|
||||||
|
segmentBatchSize int
|
||||||
|
// channelBatchSize specifies the maximum number of channel balance tasks to generate in one round
|
||||||
|
channelBatchSize int
|
||||||
|
// balanceOnMultipleCollections determines whether to balance multiple collections in one round.
|
||||||
|
// If false, only balance one collection at a time to avoid resource contention
|
||||||
|
balanceOnMultipleCollections bool
|
||||||
|
// maxCheckCollectionCount limits the maximum number of collections to check in one round
|
||||||
|
// to prevent long-running balance operations
|
||||||
|
maxCheckCollectionCount int
|
||||||
|
// autoBalanceInterval controls how frequently automatic balance operations are triggered
|
||||||
|
autoBalanceInterval time.Duration
|
||||||
|
// segmentTaskTimeout specifies the timeout for segment balance tasks
|
||||||
|
segmentTaskTimeout time.Duration
|
||||||
|
// channelTaskTimeout specifies the timeout for channel balance tasks
|
||||||
|
channelTaskTimeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// This method fetches all balance-related configuration parameters from the global
|
||||||
|
// parameter table and returns a balanceConfig struct for use in balance operations.
|
||||||
|
func (b *BalanceChecker) loadBalanceConfig() balanceConfig {
|
||||||
|
return balanceConfig{
|
||||||
|
segmentBatchSize: paramtable.Get().QueryCoordCfg.BalanceSegmentBatchSize.GetAsInt(),
|
||||||
|
channelBatchSize: paramtable.Get().QueryCoordCfg.BalanceChannelBatchSize.GetAsInt(),
|
||||||
|
balanceOnMultipleCollections: paramtable.Get().QueryCoordCfg.EnableBalanceOnMultipleCollections.GetAsBool(),
|
||||||
|
maxCheckCollectionCount: paramtable.Get().QueryCoordCfg.BalanceCheckCollectionMaxCount.GetAsInt(),
|
||||||
|
autoBalanceInterval: paramtable.Get().QueryCoordCfg.AutoBalanceInterval.GetAsDuration(time.Millisecond),
|
||||||
|
segmentTaskTimeout: paramtable.Get().QueryCoordCfg.SegmentTaskTimeout.GetAsDuration(time.Millisecond),
|
||||||
|
channelTaskTimeout: paramtable.Get().QueryCoordCfg.ChannelTaskTimeout.GetAsDuration(time.Millisecond),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectionBalanceItem represents a collection in the balance priority queue.
|
||||||
|
// Each item contains collection metadata and is used to determine the order
|
||||||
|
// in which collections should be processed for balance operations.
|
||||||
|
type collectionBalanceItem struct {
|
||||||
|
*balance.BaseItem
|
||||||
|
balancePriority int
|
||||||
|
|
||||||
|
// collectionID and rowCount are used to calculate the priority
|
||||||
|
collectionID int64
|
||||||
|
rowCount int
|
||||||
|
sortOrder string
|
||||||
|
}
|
||||||
|
|
||||||
|
// The priority is determined by the BalanceTriggerOrder configuration:
|
||||||
|
// - "byrowcount": Higher row count collections get higher priority (processed first)
|
||||||
|
// - "bycollectionid": Collections with smaller IDs get higher priority
|
||||||
|
func newCollectionBalanceItem(collectionID int64, rowCount int, sortOrder string) *collectionBalanceItem {
|
||||||
|
priority := 0
|
||||||
|
if sortOrder == "bycollectionid" {
|
||||||
|
priority = int(collectionID)
|
||||||
|
} else {
|
||||||
|
priority = -rowCount
|
||||||
|
}
|
||||||
|
|
||||||
|
return &collectionBalanceItem{
|
||||||
|
BaseItem: &balance.BaseItem{},
|
||||||
|
collectionID: collectionID,
|
||||||
|
rowCount: rowCount,
|
||||||
|
sortOrder: sortOrder,
|
||||||
|
balancePriority: priority,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *collectionBalanceItem) getPriority() int {
|
||||||
|
return c.balancePriority
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *collectionBalanceItem) setPriority(priority int) {
|
||||||
|
c.balancePriority = priority
|
||||||
|
}
|
||||||
|
|
||||||
// BalanceChecker checks the cluster distribution and generates balance tasks.
|
// BalanceChecker checks the cluster distribution and generates balance tasks.
|
||||||
|
// It is responsible for monitoring the load distribution across query nodes and
|
||||||
|
// generating segment/channel move tasks to maintain optimal balance.
|
||||||
|
//
|
||||||
|
// The BalanceChecker operates in two modes:
|
||||||
|
// 1. Stopping Balance: High-priority balance for nodes that are being stopped or read-only nodes
|
||||||
|
// 2. Normal Balance: Regular automatic balance operations to optimize cluster performance
|
||||||
|
//
|
||||||
|
// Both modes use priority queues to determine the order in which collections are processed.
|
||||||
type BalanceChecker struct {
|
type BalanceChecker struct {
|
||||||
*checkerActivation
|
*checkerActivation
|
||||||
meta *meta.Meta
|
meta *meta.Meta
|
||||||
nodeManager *session.NodeManager
|
nodeManager *session.NodeManager
|
||||||
scheduler task.Scheduler
|
scheduler task.Scheduler
|
||||||
targetMgr meta.TargetManagerInterface
|
targetMgr meta.TargetManagerInterface
|
||||||
|
// getBalancerFunc returns the appropriate balancer for generating balance plans
|
||||||
getBalancerFunc GetBalancerFunc
|
getBalancerFunc GetBalancerFunc
|
||||||
|
|
||||||
normalBalanceCollectionsCurrentRound typeutil.UniqueSet
|
// normalBalanceQueue maintains collections pending normal balance operations,
|
||||||
stoppingBalanceCollectionsCurrentRound typeutil.UniqueSet
|
// ordered by priority (row count or collection ID)
|
||||||
|
normalBalanceQueue *balance.PriorityQueue
|
||||||
|
// stoppingBalanceQueue maintains collections pending stopping balance operations,
|
||||||
|
// used when nodes are being gracefully stopped
|
||||||
|
stoppingBalanceQueue *balance.PriorityQueue
|
||||||
|
|
||||||
// record auto balance ts
|
// autoBalanceTs records the timestamp of the last auto balance operation
|
||||||
|
// to ensure balance operations don't happen too frequently
|
||||||
autoBalanceTs time.Time
|
autoBalanceTs time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,14 +150,14 @@ func NewBalanceChecker(meta *meta.Meta,
|
|||||||
getBalancerFunc GetBalancerFunc,
|
getBalancerFunc GetBalancerFunc,
|
||||||
) *BalanceChecker {
|
) *BalanceChecker {
|
||||||
return &BalanceChecker{
|
return &BalanceChecker{
|
||||||
checkerActivation: newCheckerActivation(),
|
checkerActivation: newCheckerActivation(),
|
||||||
meta: meta,
|
meta: meta,
|
||||||
targetMgr: targetMgr,
|
targetMgr: targetMgr,
|
||||||
nodeManager: nodeMgr,
|
nodeManager: nodeMgr,
|
||||||
normalBalanceCollectionsCurrentRound: typeutil.NewUniqueSet(),
|
normalBalanceQueue: balance.NewPriorityQueuePtr(),
|
||||||
stoppingBalanceCollectionsCurrentRound: typeutil.NewUniqueSet(),
|
stoppingBalanceQueue: balance.NewPriorityQueuePtr(),
|
||||||
scheduler: scheduler,
|
scheduler: scheduler,
|
||||||
getBalancerFunc: getBalancerFunc,
|
getBalancerFunc: getBalancerFunc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,6 +169,12 @@ func (b *BalanceChecker) Description() string {
|
|||||||
return "BalanceChecker checks the cluster distribution and generates balance tasks"
|
return "BalanceChecker checks the cluster distribution and generates balance tasks"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// readyToCheck determines if a collection is ready for balance operations.
|
||||||
|
// A collection is considered ready if:
|
||||||
|
// 1. It exists in the metadata
|
||||||
|
// 2. It has either a current target or next target defined
|
||||||
|
//
|
||||||
|
// Returns true if the collection is ready for balance operations.
|
||||||
func (b *BalanceChecker) readyToCheck(ctx context.Context, collectionID int64) bool {
|
func (b *BalanceChecker) readyToCheck(ctx context.Context, collectionID int64) bool {
|
||||||
metaExist := (b.meta.GetCollection(ctx, collectionID) != nil)
|
metaExist := (b.meta.GetCollection(ctx, collectionID) != nil)
|
||||||
targetExist := b.targetMgr.IsNextTargetExist(ctx, collectionID) || b.targetMgr.IsCurrentTargetExist(ctx, collectionID, common.AllPartitionsID)
|
targetExist := b.targetMgr.IsNextTargetExist(ctx, collectionID) || b.targetMgr.IsCurrentTargetExist(ctx, collectionID, common.AllPartitionsID)
|
||||||
@ -88,117 +182,151 @@ func (b *BalanceChecker) readyToCheck(ctx context.Context, collectionID int64) b
|
|||||||
return metaExist && targetExist
|
return metaExist && targetExist
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BalanceChecker) getReplicaForStoppingBalance(ctx context.Context) []int64 {
|
type ReadyForBalanceFilter func(ctx context.Context, collectionID int64) bool
|
||||||
hasUnbalancedCollection := false
|
|
||||||
defer func() {
|
|
||||||
if !hasUnbalancedCollection {
|
|
||||||
b.stoppingBalanceCollectionsCurrentRound.Clear()
|
|
||||||
log.RatedDebug(10, "BalanceChecker has triggered stopping balance for all "+
|
|
||||||
"collections in one round, clear collectionIDs for this round")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
|
// filterCollectionForBalance filters all collections using the provided filter functions.
|
||||||
|
// Only collections that pass ALL filter criteria will be included in the result.
|
||||||
|
// This is used to select collections eligible for balance operations based on
|
||||||
|
// various conditions like load status, target readiness, etc.
|
||||||
|
// Returns a slice of collection IDs that pass all filter criteria.
|
||||||
|
func (b *BalanceChecker) filterCollectionForBalance(ctx context.Context, filter ...ReadyForBalanceFilter) []int64 {
|
||||||
ids := b.meta.GetAll(ctx)
|
ids := b.meta.GetAll(ctx)
|
||||||
// Sort collections using the configured sort order
|
ret := make([]int64, 0)
|
||||||
ids = b.sortCollections(ctx, ids)
|
for _, cid := range ids {
|
||||||
|
shouldInclude := true
|
||||||
if paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
|
for _, f := range filter {
|
||||||
for _, cid := range ids {
|
if !f(ctx, cid) {
|
||||||
// if target and meta isn't ready, skip balance this collection
|
shouldInclude = false
|
||||||
if !b.readyToCheck(ctx, cid) {
|
break
|
||||||
continue
|
|
||||||
}
|
|
||||||
if b.stoppingBalanceCollectionsCurrentRound.Contain(cid) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
replicas := b.meta.ReplicaManager.GetByCollection(ctx, cid)
|
|
||||||
stoppingReplicas := make([]int64, 0)
|
|
||||||
for _, replica := range replicas {
|
|
||||||
// If there are some delegator work on query node, we need to balance channel to streamingnode forcely.
|
|
||||||
channelRONodes := make([]int64, 0)
|
|
||||||
if streamingutil.IsStreamingServiceEnabled() {
|
|
||||||
_, channelRONodes = utils.GetChannelRWAndRONodesFor260(replica, b.nodeManager)
|
|
||||||
}
|
|
||||||
if replica.RONodesCount()+replica.ROSQNodesCount() > 0 || len(channelRONodes) > 0 {
|
|
||||||
stoppingReplicas = append(stoppingReplicas, replica.GetID())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(stoppingReplicas) > 0 {
|
|
||||||
hasUnbalancedCollection = true
|
|
||||||
b.stoppingBalanceCollectionsCurrentRound.Insert(cid)
|
|
||||||
return stoppingReplicas
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if shouldInclude {
|
||||||
|
ret = append(ret, cid)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return ret
|
||||||
// finish current round for stopping balance if no unbalanced collection
|
|
||||||
hasUnbalancedCollection = false
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BalanceChecker) getReplicaForNormalBalance(ctx context.Context) []int64 {
|
// constructStoppingBalanceQueue creates and populates the stopping balance priority queue.
|
||||||
hasUnbalancedCollection := false
|
// This queue contains collections that need balance operations due to nodes being stopped.
|
||||||
defer func() {
|
// Collections are ordered by priority (row count or collection ID based on configuration).
|
||||||
if !hasUnbalancedCollection {
|
//
|
||||||
b.normalBalanceCollectionsCurrentRound.Clear()
|
// Returns a new priority queue with all eligible collections for stopping balance.
|
||||||
log.RatedDebug(10, "BalanceChecker has triggered normal balance for all "+
|
// Note: cause stopping balance need to move out all data from the node, so we need to check all collections.
|
||||||
"collections in one round, clear collectionIDs for this round")
|
func (b *BalanceChecker) constructStoppingBalanceQueue(ctx context.Context) *balance.PriorityQueue {
|
||||||
}
|
sortOrder := strings.ToLower(Params.QueryCoordCfg.BalanceTriggerOrder.GetValue())
|
||||||
}()
|
if sortOrder == "" {
|
||||||
|
sortOrder = "byrowcount" // Default to ByRowCount
|
||||||
// 1. no stopping balance and auto balance is disabled, return empty collections for balance
|
|
||||||
// 2. when balancer isn't active, skip auto balance
|
|
||||||
if !Params.QueryCoordCfg.AutoBalance.GetAsBool() || !b.IsActive() {
|
|
||||||
// finish current round for normal balance if normal balance isn't triggered
|
|
||||||
hasUnbalancedCollection = false
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ids := b.meta.GetAll(ctx)
|
ret := b.filterCollectionForBalance(ctx, b.readyToCheck)
|
||||||
// all replicas belonging to loading collection will be skipped
|
pq := balance.NewPriorityQueuePtr()
|
||||||
loadedCollections := lo.Filter(ids, func(cid int64, _ int) bool {
|
for _, cid := range ret {
|
||||||
|
rowCount := b.targetMgr.GetCollectionRowCount(ctx, cid, meta.CurrentTargetFirst)
|
||||||
|
item := newCollectionBalanceItem(cid, int(rowCount), sortOrder)
|
||||||
|
pq.Push(item)
|
||||||
|
}
|
||||||
|
b.stoppingBalanceQueue = pq
|
||||||
|
return pq
|
||||||
|
}
|
||||||
|
|
||||||
|
// constructNormalBalanceQueue creates and populates the normal balance priority queue.
|
||||||
|
// This queue contains loaded collections that are ready for regular balance operations.
|
||||||
|
// Collections must meet multiple criteria:
|
||||||
|
// 1. Be ready for balance operations (metadata and target exist)
|
||||||
|
// 2. Have loaded status (actively serving queries)
|
||||||
|
// 3. Have current target ready (consistent state)
|
||||||
|
//
|
||||||
|
// Returns a new priority queue with all eligible collections for normal balance.
|
||||||
|
func (b *BalanceChecker) constructNormalBalanceQueue(ctx context.Context) *balance.PriorityQueue {
|
||||||
|
filterLoadedCollections := func(ctx context.Context, cid int64) bool {
|
||||||
collection := b.meta.GetCollection(ctx, cid)
|
collection := b.meta.GetCollection(ctx, cid)
|
||||||
return collection != nil && collection.GetStatus() == querypb.LoadStatus_Loaded
|
return collection != nil && collection.GetStatus() == querypb.LoadStatus_Loaded
|
||||||
})
|
|
||||||
|
|
||||||
// Before performing balancing, check the CurrentTarget/LeaderView/Distribution for all collections.
|
|
||||||
// If any collection has unready info, skip the balance operation to avoid inconsistencies.
|
|
||||||
notReadyCollections := lo.Filter(loadedCollections, func(cid int64, _ int) bool {
|
|
||||||
// todo: should also check distribution and leader view in the future
|
|
||||||
return !b.targetMgr.IsCurrentTargetReady(ctx, cid)
|
|
||||||
})
|
|
||||||
if len(notReadyCollections) > 0 {
|
|
||||||
// finish current round for normal balance if any collection isn't ready
|
|
||||||
hasUnbalancedCollection = false
|
|
||||||
log.RatedInfo(10, "skip normal balance, cause collection not ready for balance", zap.Int64s("collectionIDs", notReadyCollections))
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort collections using the configured sort order
|
filterTargetReadyCollections := func(ctx context.Context, cid int64) bool {
|
||||||
loadedCollections = b.sortCollections(ctx, loadedCollections)
|
return b.targetMgr.IsCurrentTargetReady(ctx, cid)
|
||||||
|
|
||||||
// iterator one normal collection in one round
|
|
||||||
normalReplicasToBalance := make([]int64, 0)
|
|
||||||
for _, cid := range loadedCollections {
|
|
||||||
if b.normalBalanceCollectionsCurrentRound.Contain(cid) {
|
|
||||||
log.RatedDebug(10, "BalanceChecker is balancing this collection, skip balancing in this round",
|
|
||||||
zap.Int64("collectionID", cid))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
hasUnbalancedCollection = true
|
|
||||||
b.normalBalanceCollectionsCurrentRound.Insert(cid)
|
|
||||||
for _, replica := range b.meta.ReplicaManager.GetByCollection(ctx, cid) {
|
|
||||||
normalReplicasToBalance = append(normalReplicasToBalance, replica.GetID())
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
return normalReplicasToBalance
|
|
||||||
|
sortOrder := strings.ToLower(Params.QueryCoordCfg.BalanceTriggerOrder.GetValue())
|
||||||
|
if sortOrder == "" {
|
||||||
|
sortOrder = "byrowcount" // Default to ByRowCount
|
||||||
|
}
|
||||||
|
|
||||||
|
ret := b.filterCollectionForBalance(ctx, b.readyToCheck, filterLoadedCollections, filterTargetReadyCollections)
|
||||||
|
pq := balance.NewPriorityQueuePtr()
|
||||||
|
for _, cid := range ret {
|
||||||
|
rowCount := b.targetMgr.GetCollectionRowCount(ctx, cid, meta.CurrentTargetFirst)
|
||||||
|
item := newCollectionBalanceItem(cid, int(rowCount), sortOrder)
|
||||||
|
pq.Push(item)
|
||||||
|
}
|
||||||
|
b.normalBalanceQueue = pq
|
||||||
|
return pq
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BalanceChecker) balanceReplicas(ctx context.Context, replicaIDs []int64) ([]balance.SegmentAssignPlan, []balance.ChannelAssignPlan) {
|
// getReplicaForStoppingBalance returns replicas that need stopping balance operations.
|
||||||
|
// A replica needs stopping balance if it has:
|
||||||
|
// 1. Read-only (RO) nodes that need to be drained
|
||||||
|
// 2. Read-only streaming query (ROSQ) nodes that need to be drained
|
||||||
|
// 3. Channel read-only nodes when streaming service is enabled
|
||||||
|
//
|
||||||
|
// These replicas need immediate attention to move data off nodes that are being stopped.
|
||||||
|
//
|
||||||
|
// Returns a slice of replica IDs that need stopping balance operations.
|
||||||
|
func (b *BalanceChecker) getReplicaForStoppingBalance(ctx context.Context, collectionID int64) []int64 {
|
||||||
|
filterReplicaWithRONodes := func(replica *meta.Replica, _ int) bool {
|
||||||
|
channelRONodes := make([]int64, 0)
|
||||||
|
if streamingutil.IsStreamingServiceEnabled() {
|
||||||
|
_, channelRONodes = utils.GetChannelRWAndRONodesFor260(replica, b.nodeManager)
|
||||||
|
}
|
||||||
|
return replica.RONodesCount()+replica.ROSQNodesCount() > 0 || len(channelRONodes) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// filter replicas with RONodes or channelRONodes
|
||||||
|
replicas := b.meta.ReplicaManager.GetByCollection(ctx, collectionID)
|
||||||
|
ret := make([]int64, 0)
|
||||||
|
for _, replica := range replicas {
|
||||||
|
if filterReplicaWithRONodes(replica, 0) {
|
||||||
|
ret = append(ret, replica.GetID())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
// getReplicaForNormalBalance returns all replicas for a collection for normal balance operations.
|
||||||
|
// Unlike stopping balance, normal balance considers all replicas regardless of their node status.
|
||||||
|
// This allows for comprehensive load balancing across the entire collection.
|
||||||
|
//
|
||||||
|
// Returns a slice of all replica IDs for the collection.
|
||||||
|
func (b *BalanceChecker) getReplicaForNormalBalance(ctx context.Context, collectionID int64) []int64 {
|
||||||
|
replicas := b.meta.ReplicaManager.GetByCollection(ctx, collectionID)
|
||||||
|
return lo.Map(replicas, func(replica *meta.Replica, _ int) int64 {
|
||||||
|
return replica.GetID()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateBalanceTasksFromReplicas generates balance tasks for the given replicas.
|
||||||
|
// This method is the core of the balance operation that:
|
||||||
|
// 1. Uses the balancer to create segment and channel assignment plans
|
||||||
|
// 2. Converts these plans into executable tasks
|
||||||
|
// 3. Sets appropriate priorities and reasons for the tasks
|
||||||
|
//
|
||||||
|
// The process involves:
|
||||||
|
// - Getting balance plans from the configured balancer for each replica
|
||||||
|
// - Creating segment move tasks from segment assignment plans
|
||||||
|
// - Creating channel move tasks from channel assignment plans
|
||||||
|
// - Setting task metadata (priority, reason, timeout)
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - segmentTasks: tasks for moving segments between nodes
|
||||||
|
// - channelTasks: tasks for moving channels between nodes
|
||||||
|
func (b *BalanceChecker) generateBalanceTasksFromReplicas(ctx context.Context, replicas []int64, config balanceConfig) ([]task.Task, []task.Task) {
|
||||||
|
if len(replicas) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
segmentPlans, channelPlans := make([]balance.SegmentAssignPlan, 0), make([]balance.ChannelAssignPlan, 0)
|
segmentPlans, channelPlans := make([]balance.SegmentAssignPlan, 0), make([]balance.ChannelAssignPlan, 0)
|
||||||
for _, rid := range replicaIDs {
|
for _, rid := range replicas {
|
||||||
replica := b.meta.ReplicaManager.Get(ctx, rid)
|
replica := b.meta.ReplicaManager.Get(ctx, rid)
|
||||||
if replica == nil {
|
if replica == nil {
|
||||||
continue
|
continue
|
||||||
@ -210,62 +338,103 @@ func (b *BalanceChecker) balanceReplicas(ctx context.Context, replicaIDs []int64
|
|||||||
balance.PrintNewBalancePlans(replica.GetCollectionID(), replica.GetID(), sPlans, cPlans)
|
balance.PrintNewBalancePlans(replica.GetCollectionID(), replica.GetID(), sPlans, cPlans)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return segmentPlans, channelPlans
|
|
||||||
}
|
|
||||||
|
|
||||||
// Notice: balance checker will generate tasks for multiple collections in one round,
|
|
||||||
// so generated tasks will be submitted to scheduler directly, and return nil
|
|
||||||
func (b *BalanceChecker) Check(ctx context.Context) []task.Task {
|
|
||||||
segmentBatchSize := paramtable.Get().QueryCoordCfg.BalanceSegmentBatchSize.GetAsInt()
|
|
||||||
channelBatchSize := paramtable.Get().QueryCoordCfg.BalanceChannelBatchSize.GetAsInt()
|
|
||||||
balanceOnMultipleCollections := paramtable.Get().QueryCoordCfg.EnableBalanceOnMultipleCollections.GetAsBool()
|
|
||||||
|
|
||||||
segmentTasks := make([]task.Task, 0)
|
segmentTasks := make([]task.Task, 0)
|
||||||
channelTasks := make([]task.Task, 0)
|
channelTasks := make([]task.Task, 0)
|
||||||
|
// Create segment tasks with error handling
|
||||||
generateBalanceTaskForReplicas := func(replicas []int64) {
|
if len(segmentPlans) > 0 {
|
||||||
segmentPlans, channelPlans := b.balanceReplicas(ctx, replicas)
|
tasks := balance.CreateSegmentTasksFromPlans(ctx, b.ID(), config.segmentTaskTimeout, segmentPlans)
|
||||||
tasks := balance.CreateSegmentTasksFromPlans(ctx, b.ID(), Params.QueryCoordCfg.SegmentTaskTimeout.GetAsDuration(time.Millisecond), segmentPlans)
|
if len(tasks) > 0 {
|
||||||
task.SetPriority(task.TaskPriorityLow, tasks...)
|
task.SetPriority(task.TaskPriorityLow, tasks...)
|
||||||
task.SetReason("segment unbalanced", tasks...)
|
task.SetReason("segment unbalanced", tasks...)
|
||||||
segmentTasks = append(segmentTasks, tasks...)
|
segmentTasks = append(segmentTasks, tasks...)
|
||||||
|
|
||||||
tasks = balance.CreateChannelTasksFromPlans(ctx, b.ID(), Params.QueryCoordCfg.ChannelTaskTimeout.GetAsDuration(time.Millisecond), channelPlans)
|
|
||||||
task.SetReason("channel unbalanced", tasks...)
|
|
||||||
channelTasks = append(channelTasks, tasks...)
|
|
||||||
}
|
|
||||||
|
|
||||||
stoppingReplicas := b.getReplicaForStoppingBalance(ctx)
|
|
||||||
if len(stoppingReplicas) > 0 {
|
|
||||||
// check for stopping balance first
|
|
||||||
generateBalanceTaskForReplicas(stoppingReplicas)
|
|
||||||
// iterate all collection to find a collection to balance
|
|
||||||
for len(segmentTasks) < segmentBatchSize && len(channelTasks) < channelBatchSize && b.stoppingBalanceCollectionsCurrentRound.Len() > 0 {
|
|
||||||
if !balanceOnMultipleCollections && (len(segmentTasks) > 0 || len(channelTasks) > 0) {
|
|
||||||
// if balance on multiple collections is disabled, and there are already some tasks, break
|
|
||||||
break
|
|
||||||
}
|
|
||||||
replicasToBalance := b.getReplicaForStoppingBalance(ctx)
|
|
||||||
generateBalanceTaskForReplicas(replicasToBalance)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// then check for auto balance
|
|
||||||
if time.Since(b.autoBalanceTs) > paramtable.Get().QueryCoordCfg.AutoBalanceInterval.GetAsDuration(time.Millisecond) {
|
|
||||||
b.autoBalanceTs = time.Now()
|
|
||||||
replicasToBalance := b.getReplicaForNormalBalance(ctx)
|
|
||||||
generateBalanceTaskForReplicas(replicasToBalance)
|
|
||||||
// iterate all collection to find a collection to balance
|
|
||||||
for len(segmentTasks) < segmentBatchSize && len(channelTasks) < channelBatchSize && b.normalBalanceCollectionsCurrentRound.Len() > 0 {
|
|
||||||
if !balanceOnMultipleCollections && (len(segmentTasks) > 0 || len(channelTasks) > 0) {
|
|
||||||
// if balance on multiple collections is disabled, and there are already some tasks, break
|
|
||||||
break
|
|
||||||
}
|
|
||||||
replicasToBalance := b.getReplicaForNormalBalance(ctx)
|
|
||||||
generateBalanceTaskForReplicas(replicasToBalance)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create channel tasks with error handling
|
||||||
|
if len(channelPlans) > 0 {
|
||||||
|
tasks := balance.CreateChannelTasksFromPlans(ctx, b.ID(), config.channelTaskTimeout, channelPlans)
|
||||||
|
if len(tasks) > 0 {
|
||||||
|
task.SetReason("channel unbalanced", tasks...)
|
||||||
|
channelTasks = append(channelTasks, tasks...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return segmentTasks, channelTasks
|
||||||
|
}
|
||||||
|
|
||||||
|
// processBalanceQueue processes balance queue with common logic for both normal and stopping balance.
|
||||||
|
// This is a template method that implements the core queue processing algorithm while allowing
|
||||||
|
// different balance types to provide their own specific logic through function parameters.
|
||||||
|
//
|
||||||
|
// The method implements several safeguards:
|
||||||
|
// 1. Batch size limits to prevent generating too many tasks at once
|
||||||
|
// 2. Collection count limits to prevent long-running operations
|
||||||
|
// 3. Multi-collection balance control to avoid resource contention
|
||||||
|
//
|
||||||
|
// Processing flow:
|
||||||
|
// 1. Get or construct the priority queue for collections
|
||||||
|
// 2. Pop collections from queue in priority order
|
||||||
|
// 3. Get replicas that need balance for the collection
|
||||||
|
// 4. Generate balance tasks for those replicas
|
||||||
|
// 5. Accumulate tasks until batch limits are reached
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - ctx: context for the operation
|
||||||
|
// - getReplicasFunc: function to get replicas for a collection (normal vs stopping)
|
||||||
|
// - constructQueueFunc: function to construct a new priority queue if needed
|
||||||
|
// - getQueueFunc: function to get the existing priority queue
|
||||||
|
// - config: balance configuration with batch sizes and limits
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - generatedSegmentTaskNum: number of generated segment balance tasks
|
||||||
|
// - generatedChannelTaskNum: number of generated channel balance tasks
|
||||||
|
func (b *BalanceChecker) processBalanceQueue(
|
||||||
|
ctx context.Context,
|
||||||
|
getReplicasFunc func(context.Context, int64) []int64,
|
||||||
|
constructQueueFunc func(context.Context) *balance.PriorityQueue,
|
||||||
|
getQueueFunc func() *balance.PriorityQueue,
|
||||||
|
config balanceConfig,
|
||||||
|
) (int, int) {
|
||||||
|
checkCollectionCount := 0
|
||||||
|
pq := getQueueFunc()
|
||||||
|
if pq == nil || pq.Len() == 0 {
|
||||||
|
pq = constructQueueFunc(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
generatedSegmentTaskNum := 0
|
||||||
|
generatedChannelTaskNum := 0
|
||||||
|
|
||||||
|
for generatedSegmentTaskNum < config.segmentBatchSize &&
|
||||||
|
generatedChannelTaskNum < config.channelBatchSize &&
|
||||||
|
checkCollectionCount < config.maxCheckCollectionCount &&
|
||||||
|
pq.Len() > 0 {
|
||||||
|
// Break if balanceOnMultipleCollections is disabled and we already have tasks
|
||||||
|
if !config.balanceOnMultipleCollections && (generatedSegmentTaskNum > 0 || generatedChannelTaskNum > 0) {
|
||||||
|
log.Debug("Balance on multiple collections disabled, stopping after first collection")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
item := pq.Pop().(*collectionBalanceItem)
|
||||||
|
checkCollectionCount++
|
||||||
|
|
||||||
|
replicasToBalance := getReplicasFunc(ctx, item.collectionID)
|
||||||
|
if len(replicasToBalance) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
newSegmentTasks, newChannelTasks := b.generateBalanceTasksFromReplicas(ctx, replicasToBalance, config)
|
||||||
|
generatedSegmentTaskNum += len(newSegmentTasks)
|
||||||
|
generatedChannelTaskNum += len(newChannelTasks)
|
||||||
|
b.submitTasks(newSegmentTasks, newChannelTasks)
|
||||||
|
}
|
||||||
|
return generatedSegmentTaskNum, generatedChannelTaskNum
|
||||||
|
}
|
||||||
|
|
||||||
|
// submitTasks submits the generated balance tasks to the scheduler for execution.
|
||||||
|
// This method handles the final step of the balance process by adding all
|
||||||
|
// generated tasks to the task scheduler, which will execute them asynchronously.
|
||||||
|
func (b *BalanceChecker) submitTasks(segmentTasks, channelTasks []task.Task) {
|
||||||
for _, task := range segmentTasks {
|
for _, task := range segmentTasks {
|
||||||
b.scheduler.Add(task)
|
b.scheduler.Add(task)
|
||||||
}
|
}
|
||||||
@ -273,45 +442,99 @@ func (b *BalanceChecker) Check(ctx context.Context) []task.Task {
|
|||||||
for _, task := range channelTasks {
|
for _, task := range channelTasks {
|
||||||
b.scheduler.Add(task)
|
b.scheduler.Add(task)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check is the main entry point for balance operations.
|
||||||
|
// This method implements a two-phase balance strategy with clear priorities:
|
||||||
|
//
|
||||||
|
// **Phase 1: Stopping Balance (Higher Priority)**
|
||||||
|
// - Handles nodes that are being gracefully stopped
|
||||||
|
// - Moves data off read-only nodes to active nodes
|
||||||
|
// - Critical for maintaining service availability during node shutdowns
|
||||||
|
// - Runs immediately when stopping nodes are detected
|
||||||
|
//
|
||||||
|
// **Phase 2: Normal Balance (Lower Priority)**
|
||||||
|
// - Performs regular load balancing to optimize cluster performance
|
||||||
|
// - Runs periodically based on autoBalanceInterval configuration
|
||||||
|
// - Considers all collections and distributes load evenly
|
||||||
|
// - Skipped if stopping balance tasks were generated
|
||||||
|
//
|
||||||
|
// **Key Design Decisions:**
|
||||||
|
// 1. Tasks are submitted directly to scheduler and nil is returned
|
||||||
|
// (unlike other checkers that return tasks to caller)
|
||||||
|
// 2. Stopping balance always takes precedence over normal balance
|
||||||
|
// 3. Performance monitoring alerts for operations > 100ms
|
||||||
|
// 4. Configuration is loaded fresh each time to respect dynamic updates
|
||||||
|
//
|
||||||
|
// **Return Value:**
|
||||||
|
// Always returns nil because tasks are submitted directly to the scheduler.
|
||||||
|
// This design allows the balance checker to handle multiple collections
|
||||||
|
// and large numbers of tasks efficiently.
|
||||||
|
//
|
||||||
|
// **Performance Monitoring:**
|
||||||
|
// The method tracks execution time and logs warnings for slow operations
|
||||||
|
// to help identify performance bottlenecks in large clusters.
|
||||||
|
func (b *BalanceChecker) Check(ctx context.Context) []task.Task {
|
||||||
|
// Skip balance operations if the checker is not active
|
||||||
|
if !b.IsActive() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Performance monitoring: track execution time
|
||||||
|
start := time.Now()
|
||||||
|
defer func() {
|
||||||
|
duration := time.Since(start)
|
||||||
|
if duration > 100*time.Millisecond {
|
||||||
|
log.Info("Balance check too slow", zap.Duration("duration", duration))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Load current configuration to respect dynamic parameter changes
|
||||||
|
config := b.loadBalanceConfig()
|
||||||
|
|
||||||
|
// Phase 1: Process stopping balance first (higher priority)
|
||||||
|
// This handles nodes that are being gracefully stopped and need immediate attention
|
||||||
|
if paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
|
||||||
|
generatedSegmentTaskNum, generatedChannelTaskNum := b.processBalanceQueue(ctx,
|
||||||
|
b.getReplicaForStoppingBalance,
|
||||||
|
b.constructStoppingBalanceQueue,
|
||||||
|
func() *balance.PriorityQueue { return b.stoppingBalanceQueue },
|
||||||
|
config)
|
||||||
|
|
||||||
|
if generatedSegmentTaskNum > 0 || generatedChannelTaskNum > 0 {
|
||||||
|
// clean up the normal balance queue when stopping balance generated tasks
|
||||||
|
// make sure that next time when trigger normal balance, a new normal balance round will be started
|
||||||
|
b.normalBalanceQueue = nil
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: Process normal balance if no stopping balance was needed
|
||||||
|
// This handles regular load balancing operations for cluster optimization
|
||||||
|
if paramtable.Get().QueryCoordCfg.AutoBalance.GetAsBool() {
|
||||||
|
// Respect the auto balance interval to prevent too frequent operations
|
||||||
|
if time.Since(b.autoBalanceTs) <= config.autoBalanceInterval {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
generatedSegmentTaskNum, generatedChannelTaskNum := b.processBalanceQueue(ctx,
|
||||||
|
b.getReplicaForNormalBalance,
|
||||||
|
b.constructNormalBalanceQueue,
|
||||||
|
func() *balance.PriorityQueue { return b.normalBalanceQueue },
|
||||||
|
config)
|
||||||
|
|
||||||
|
// Submit normal balance tasks if any were generated
|
||||||
|
// Update the auto balance timestamp to enforce the interval
|
||||||
|
if generatedSegmentTaskNum > 0 || generatedChannelTaskNum > 0 {
|
||||||
|
b.autoBalanceTs = time.Now()
|
||||||
|
|
||||||
|
// clean up the stopping balance queue when normal balance generated tasks
|
||||||
|
// make sure that next time when trigger stopping balance, a new stopping balance round will be started
|
||||||
|
b.stoppingBalanceQueue = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always return nil as tasks are submitted directly to scheduler
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BalanceChecker) sortCollections(ctx context.Context, collections []int64) []int64 {
|
|
||||||
sortOrder := strings.ToLower(Params.QueryCoordCfg.BalanceTriggerOrder.GetValue())
|
|
||||||
if sortOrder == "" {
|
|
||||||
sortOrder = "byrowcount" // Default to ByRowCount
|
|
||||||
}
|
|
||||||
|
|
||||||
collectionRowCountMap := make(map[int64]int64)
|
|
||||||
for _, cid := range collections {
|
|
||||||
collectionRowCountMap[cid] = b.targetMgr.GetCollectionRowCount(ctx, cid, meta.CurrentTargetFirst)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Define sorting functions
|
|
||||||
sortByRowCount := func(i, j int) bool {
|
|
||||||
rowCount1 := collectionRowCountMap[collections[i]]
|
|
||||||
rowCount2 := collectionRowCountMap[collections[j]]
|
|
||||||
return rowCount1 > rowCount2 || (rowCount1 == rowCount2 && collections[i] < collections[j])
|
|
||||||
}
|
|
||||||
|
|
||||||
sortByCollectionID := func(i, j int) bool {
|
|
||||||
return collections[i] < collections[j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Select the appropriate sorting function
|
|
||||||
var sortFunc func(i, j int) bool
|
|
||||||
switch sortOrder {
|
|
||||||
case "byrowcount":
|
|
||||||
sortFunc = sortByRowCount
|
|
||||||
case "bycollectionid":
|
|
||||||
sortFunc = sortByCollectionID
|
|
||||||
default:
|
|
||||||
log.Warn("Invalid balance sort order configuration, using default ByRowCount", zap.String("sortOrder", sortOrder))
|
|
||||||
sortFunc = sortByRowCount
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort the collections
|
|
||||||
sort.Slice(collections, sortFunc)
|
|
||||||
return collections
|
|
||||||
}
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -2347,6 +2347,8 @@ type queryCoordConfig struct {
|
|||||||
|
|
||||||
// query node task parallelism factor
|
// query node task parallelism factor
|
||||||
QueryNodeTaskParallelismFactor ParamItem `refreshable:"true"`
|
QueryNodeTaskParallelismFactor ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
|
BalanceCheckCollectionMaxCount ParamItem `refreshable:"true"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *queryCoordConfig) init(base *BaseTable) {
|
func (p *queryCoordConfig) init(base *BaseTable) {
|
||||||
@ -2979,6 +2981,15 @@ If this parameter is set false, Milvus simply searches the growing segments with
|
|||||||
Export: false,
|
Export: false,
|
||||||
}
|
}
|
||||||
p.QueryNodeTaskParallelismFactor.Init(base.mgr)
|
p.QueryNodeTaskParallelismFactor.Init(base.mgr)
|
||||||
|
|
||||||
|
p.BalanceCheckCollectionMaxCount = ParamItem{
|
||||||
|
Key: "queryCoord.balanceCheckCollectionMaxCount",
|
||||||
|
Version: "2.6.2",
|
||||||
|
DefaultValue: "100",
|
||||||
|
Doc: "the max collection count for each balance check",
|
||||||
|
Export: false,
|
||||||
|
}
|
||||||
|
p.BalanceCheckCollectionMaxCount.Init(base.mgr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// /////////////////////////////////////////////////////////////////////////////
|
// /////////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -393,6 +393,8 @@ func TestComponentParam(t *testing.T) {
|
|||||||
assert.Equal(t, 1, Params.QueryNodeTaskParallelismFactor.GetAsInt())
|
assert.Equal(t, 1, Params.QueryNodeTaskParallelismFactor.GetAsInt())
|
||||||
params.Save("queryCoord.queryNodeTaskParallelismFactor", "2")
|
params.Save("queryCoord.queryNodeTaskParallelismFactor", "2")
|
||||||
assert.Equal(t, 2, Params.QueryNodeTaskParallelismFactor.GetAsInt())
|
assert.Equal(t, 2, Params.QueryNodeTaskParallelismFactor.GetAsInt())
|
||||||
|
|
||||||
|
assert.Equal(t, 100, Params.BalanceCheckCollectionMaxCount.GetAsInt())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test queryNodeConfig", func(t *testing.T) {
|
t.Run("test queryNodeConfig", func(t *testing.T) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user