mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: wal balance consider the wal status on streamingnode (#43265)
issue: #42995 - don't balance the wal if the producing-consuming lag is too long. - don't balance if the rebalance is set as false. - don't balance if the wal is balanced recently. Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
parent
3aacd179f7
commit
07fa2cbdd3
@ -1238,6 +1238,13 @@ streaming:
|
||||
operationTimeout: 30s
|
||||
balancePolicy:
|
||||
name: vchannelFair # The multiplier of balance task trigger backoff, 2 by default
|
||||
# Whether to allow rebalance, true by default.
|
||||
# If the rebalance is not allowed, only the lost wal recovery will be executed, the rebalance (move a pchannel from one node to another node) will be skipped.
|
||||
allowRebalance: true
|
||||
minRebalanceIntervalThreshold: 5m # The max interval of rebalance for each wal, 5m by default.
|
||||
# The threshold of recovery lag for rebalance, 1s by default.
|
||||
# If the recovery lag is greater than this threshold, the rebalance of current pchannel is not allowed.
|
||||
allowRebalanceRecoveryLagThreshold: 1s
|
||||
vchannelFair:
|
||||
# The weight of pchannel count in vchannelFair balance policy,
|
||||
# the pchannel count will more evenly distributed if the weight is greater, 0.4 by default
|
||||
|
||||
@ -436,14 +436,15 @@ func generateCurrentLayout(view *channel.PChannelView, allNodesStatus map[int64]
|
||||
)
|
||||
}
|
||||
}
|
||||
allNodesInfo := make(map[int64]types.StreamingNodeInfo, len(allNodesStatus))
|
||||
allNodesInfo := make(map[int64]types.StreamingNodeStatus, len(allNodesStatus))
|
||||
for serverID, nodeStatus := range allNodesStatus {
|
||||
// filter out the unhealthy nodes.
|
||||
if nodeStatus.IsHealthy() {
|
||||
allNodesInfo[serverID] = nodeStatus.StreamingNodeInfo
|
||||
allNodesInfo[serverID] = *nodeStatus
|
||||
}
|
||||
}
|
||||
return CurrentLayout{
|
||||
Config: newCommonBalancePolicyConfig(),
|
||||
Channels: channels,
|
||||
Stats: view.Stats,
|
||||
AllNodesInfo: allNodesInfo,
|
||||
|
||||
@ -10,6 +10,7 @@ import (
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"go.uber.org/atomic"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/mocks/mock_metastore"
|
||||
"github.com/milvus-io/milvus/internal/mocks/streamingnode/client/mock_manager"
|
||||
@ -23,6 +24,7 @@ import (
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/syncutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/tsoutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
)
|
||||
|
||||
@ -62,7 +64,7 @@ func TestBalancer(t *testing.T) {
|
||||
},
|
||||
4: {
|
||||
StreamingNodeInfo: types.StreamingNodeInfo{
|
||||
ServerID: 3,
|
||||
ServerID: 4,
|
||||
Address: "localhost:3",
|
||||
},
|
||||
Err: types.ErrStopping,
|
||||
@ -185,3 +187,132 @@ func TestBalancer(t *testing.T) {
|
||||
b.Close()
|
||||
assert.ErrorIs(t, f.Get(), balancer.ErrBalancerClosed)
|
||||
}
|
||||
|
||||
func TestBalancer_WithRecoveryLag(t *testing.T) {
|
||||
paramtable.Init()
|
||||
err := etcd.InitEtcdServer(true, "", t.TempDir(), "stdout", "info")
|
||||
assert.NoError(t, err)
|
||||
defer etcd.StopEtcdServer()
|
||||
|
||||
etcdClient, err := etcd.GetEmbedEtcdClient()
|
||||
assert.NoError(t, err)
|
||||
channel.ResetStaticPChannelStatsManager()
|
||||
channel.RecoverPChannelStatsManager([]string{})
|
||||
|
||||
lag := atomic.NewBool(true)
|
||||
streamingNodeManager := mock_manager.NewMockManagerClient(t)
|
||||
streamingNodeManager.EXPECT().WatchNodeChanged(mock.Anything).Return(make(chan struct{}), nil)
|
||||
streamingNodeManager.EXPECT().Assign(mock.Anything, mock.Anything).Return(nil)
|
||||
streamingNodeManager.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
||||
streamingNodeManager.EXPECT().CollectAllStatus(mock.Anything).RunAndReturn(func(ctx context.Context) (map[int64]*types.StreamingNodeStatus, error) {
|
||||
now := time.Now()
|
||||
mvccTimeTick := tsoutil.ComposeTSByTime(now, 0)
|
||||
recoveryTimeTick := tsoutil.ComposeTSByTime(now.Add(-time.Second*10), 0)
|
||||
if !lag.Load() {
|
||||
recoveryTimeTick = mvccTimeTick
|
||||
}
|
||||
return map[int64]*types.StreamingNodeStatus{
|
||||
1: {
|
||||
StreamingNodeInfo: types.StreamingNodeInfo{
|
||||
ServerID: 1,
|
||||
Address: "localhost:1",
|
||||
},
|
||||
Metrics: types.StreamingNodeMetrics{
|
||||
WALMetrics: map[types.ChannelID]types.WALMetrics{
|
||||
channel.ChannelID{Name: "test-channel-1"}: types.RWWALMetrics{MVCCTimeTick: mvccTimeTick, RecoveryTimeTick: recoveryTimeTick},
|
||||
channel.ChannelID{Name: "test-channel-2"}: types.RWWALMetrics{MVCCTimeTick: mvccTimeTick, RecoveryTimeTick: recoveryTimeTick},
|
||||
channel.ChannelID{Name: "test-channel-3"}: types.RWWALMetrics{MVCCTimeTick: mvccTimeTick, RecoveryTimeTick: recoveryTimeTick},
|
||||
},
|
||||
},
|
||||
},
|
||||
2: {
|
||||
StreamingNodeInfo: types.StreamingNodeInfo{
|
||||
ServerID: 2,
|
||||
Address: "localhost:2",
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
})
|
||||
|
||||
catalog := mock_metastore.NewMockStreamingCoordCataLog(t)
|
||||
resource.InitForTest(resource.OptETCD(etcdClient), resource.OptStreamingCatalog(catalog), resource.OptStreamingManagerClient(streamingNodeManager))
|
||||
catalog.EXPECT().GetVersion(mock.Anything).Return(nil, nil)
|
||||
catalog.EXPECT().SaveVersion(mock.Anything, mock.Anything).Return(nil)
|
||||
catalog.EXPECT().ListPChannel(mock.Anything).Unset()
|
||||
catalog.EXPECT().ListPChannel(mock.Anything).RunAndReturn(func(ctx context.Context) ([]*streamingpb.PChannelMeta, error) {
|
||||
return []*streamingpb.PChannelMeta{
|
||||
{
|
||||
Channel: &streamingpb.PChannelInfo{
|
||||
Name: "test-channel-1",
|
||||
Term: 1,
|
||||
AccessMode: streamingpb.PChannelAccessMode_PCHANNEL_ACCESS_READWRITE,
|
||||
},
|
||||
State: streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED,
|
||||
Node: &streamingpb.StreamingNodeInfo{ServerId: 1},
|
||||
},
|
||||
{
|
||||
Channel: &streamingpb.PChannelInfo{
|
||||
Name: "test-channel-2",
|
||||
Term: 1,
|
||||
AccessMode: streamingpb.PChannelAccessMode_PCHANNEL_ACCESS_READWRITE,
|
||||
},
|
||||
State: streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED,
|
||||
Node: &streamingpb.StreamingNodeInfo{ServerId: 1},
|
||||
},
|
||||
{
|
||||
Channel: &streamingpb.PChannelInfo{
|
||||
Name: "test-channel-3",
|
||||
Term: 1,
|
||||
AccessMode: streamingpb.PChannelAccessMode_PCHANNEL_ACCESS_READWRITE,
|
||||
},
|
||||
State: streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED,
|
||||
Node: &streamingpb.StreamingNodeInfo{ServerId: 1},
|
||||
},
|
||||
{
|
||||
Channel: &streamingpb.PChannelInfo{
|
||||
Name: "test-channel-4",
|
||||
Term: 1,
|
||||
AccessMode: streamingpb.PChannelAccessMode_PCHANNEL_ACCESS_READWRITE,
|
||||
},
|
||||
State: streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED,
|
||||
Node: &streamingpb.StreamingNodeInfo{ServerId: 2},
|
||||
},
|
||||
}, nil
|
||||
})
|
||||
catalog.EXPECT().SavePChannels(mock.Anything, mock.Anything).Return(nil).Maybe()
|
||||
|
||||
ctx := context.Background()
|
||||
b, err := balancer.RecoverBalancer(ctx)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, b)
|
||||
|
||||
b.Trigger(context.Background())
|
||||
ctx2, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
b.WatchChannelAssignments(ctx2, func(version typeutil.VersionInt64Pair, relations []types.PChannelInfoAssigned) error {
|
||||
counts := map[int64]int{}
|
||||
for _, relation := range relations {
|
||||
assert.Equal(t, relation.Channel.AccessMode, types.AccessModeRW)
|
||||
counts[relation.Node.ServerID]++
|
||||
}
|
||||
assert.Equal(t, 2, len(counts))
|
||||
assert.Equal(t, 3, counts[1])
|
||||
assert.Equal(t, 1, counts[2])
|
||||
return nil
|
||||
})
|
||||
|
||||
lag.Store(false)
|
||||
b.Trigger(context.Background())
|
||||
doneErr := errors.New("done")
|
||||
b.WatchChannelAssignments(context.Background(), func(version typeutil.VersionInt64Pair, relations []types.PChannelInfoAssigned) error {
|
||||
counts := map[int64]int{}
|
||||
for _, relation := range relations {
|
||||
assert.Equal(t, relation.Channel.AccessMode, types.AccessModeRW)
|
||||
counts[relation.Node.ServerID]++
|
||||
}
|
||||
if len(counts) == 2 && counts[1] == 2 && counts[2] == 2 {
|
||||
return doneErr
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
package channel
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/streamingpb"
|
||||
@ -91,6 +93,11 @@ func (c *PChannelMeta) IsAssigned() bool {
|
||||
return c.inner.State == streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED
|
||||
}
|
||||
|
||||
// LastAssignTimestamp returns the last assigned timestamp.
|
||||
func (c *PChannelMeta) LastAssignTimestamp() time.Time {
|
||||
return time.Unix(int64(c.inner.LastAssignTimestampSeconds), 0)
|
||||
}
|
||||
|
||||
// State returns the state of the channel.
|
||||
func (c *PChannelMeta) State() streamingpb.PChannelMetaState {
|
||||
return c.inner.State
|
||||
@ -140,6 +147,7 @@ func (m *mutablePChannel) AssignToServerDone() {
|
||||
if m.inner.State == streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNING {
|
||||
m.inner.Histories = make([]*streamingpb.PChannelAssignmentLog, 0)
|
||||
m.inner.State = streamingpb.PChannelMetaState_PCHANNEL_META_STATE_ASSIGNED
|
||||
m.inner.LastAssignTimestampSeconds = uint64(time.Now().Unix())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ package channel
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/v2/streaming/util/types"
|
||||
)
|
||||
@ -21,7 +22,9 @@ func newPChannelView(metas map[ChannelID]*PChannelMeta) *PChannelView {
|
||||
panic(fmt.Sprintf("duplicate rw channel: %s", id.String()))
|
||||
}
|
||||
view.Channels[id] = meta
|
||||
view.Stats[id] = StaticPChannelStatsManager.MustGet().GetPChannelStats(id).View()
|
||||
stat := StaticPChannelStatsManager.MustGet().GetPChannelStats(id).View()
|
||||
stat.LastAssignTimestamp = meta.LastAssignTimestamp()
|
||||
view.Stats[id] = stat
|
||||
}
|
||||
return view
|
||||
}
|
||||
@ -34,5 +37,6 @@ type PChannelView struct {
|
||||
|
||||
// PChannelStatsView is the view of the pchannel stats.
|
||||
type PChannelStatsView struct {
|
||||
VChannels map[string]int64
|
||||
LastAssignTimestamp time.Time
|
||||
VChannels map[string]int64
|
||||
}
|
||||
|
||||
@ -108,7 +108,7 @@ func (p *expectedLayoutForVChannelFairPolicy) Assign(channelID types.ChannelID,
|
||||
info.Term++
|
||||
p.Assignments[channelID] = types.PChannelInfoAssigned{
|
||||
Channel: info,
|
||||
Node: node,
|
||||
Node: node.StreamingNodeInfo,
|
||||
}
|
||||
p.Nodes[node.ServerID].AssignedChannels[channelID] = struct{}{}
|
||||
p.Nodes[node.ServerID].AssignedVChannelCount += len(stats.VChannels)
|
||||
@ -171,6 +171,9 @@ func (p *expectedLayoutForVChannelFairPolicy) FindTheLeastUnbalanceScoreIncremen
|
||||
var targetChannelID types.ChannelID
|
||||
minScore := math.MaxFloat64
|
||||
for channelID := range p.Assignments {
|
||||
if !p.CurrentLayout.AllowRebalance(channelID) {
|
||||
continue
|
||||
}
|
||||
serverID := p.Assignments[channelID].Node.ServerID
|
||||
p.Unassign(channelID)
|
||||
currentScore := p.GlobalUnbalancedScore
|
||||
|
||||
@ -2,6 +2,7 @@ package vchannelfair
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
@ -101,10 +102,107 @@ func TestPChannelCountFair(t *testing.T) {
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
|
||||
expected, err = policy.Balance(newLayout(map[string]int{
|
||||
"c1": 1,
|
||||
"c2": 2,
|
||||
"c3": 3,
|
||||
"c4": 3,
|
||||
"c5": 3,
|
||||
}, make(map[string]map[string]int64), []int64{1, 2, 3}))
|
||||
assert.NoError(t, err)
|
||||
|
||||
_, err = policy.Balance(balancer.CurrentLayout{})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestPChannelCountFairWithoutRebalance(t *testing.T) {
|
||||
paramtable.Init()
|
||||
b := &PolicyBuilder{}
|
||||
policy := b.Build()
|
||||
|
||||
createNewLayout := func() balancer.CurrentLayout {
|
||||
return newLayout(map[string]int{
|
||||
"c1": 1,
|
||||
"c2": 2,
|
||||
"c3": 3,
|
||||
"c4": 3,
|
||||
"c5": 3,
|
||||
"c6": 3,
|
||||
}, make(map[string]map[string]int64), []int64{1, 2, 3})
|
||||
}
|
||||
|
||||
// test allow rebalance
|
||||
layout := createNewLayout()
|
||||
expected, err := policy.Balance(layout)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 6, len(expected.ChannelAssignment))
|
||||
counts := countByServerID(expected)
|
||||
assert.Equal(t, 3, len(counts))
|
||||
for _, count := range counts {
|
||||
assert.Equal(t, count, 2)
|
||||
}
|
||||
|
||||
// test allow rebalance = false
|
||||
layout = createNewLayout()
|
||||
layout.Config.AllowRebalance = false
|
||||
expected, err = policy.Balance(layout)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 6, len(expected.ChannelAssignment))
|
||||
assert.Equal(t, int64(1), expected.ChannelAssignment[newChannelID("c1")].Node.ServerID)
|
||||
assert.Equal(t, int64(2), expected.ChannelAssignment[newChannelID("c2")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c3")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c4")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c5")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c6")].Node.ServerID)
|
||||
assert.Equal(t, 3, len(countByServerID(expected)))
|
||||
|
||||
// test min rebalance interval threshold
|
||||
layout = createNewLayout()
|
||||
layout.Config.MinRebalanceIntervalThreshold = time.Hour
|
||||
layout.Stats[newChannelID("c3")] = channel.PChannelStatsView{
|
||||
LastAssignTimestamp: time.Now(),
|
||||
VChannels: map[string]int64{},
|
||||
}
|
||||
layout.Stats[newChannelID("c4")] = channel.PChannelStatsView{
|
||||
LastAssignTimestamp: time.Now(),
|
||||
VChannels: map[string]int64{},
|
||||
}
|
||||
layout.Stats[newChannelID("c5")] = channel.PChannelStatsView{
|
||||
LastAssignTimestamp: time.Now(),
|
||||
VChannels: map[string]int64{},
|
||||
}
|
||||
expected, err = policy.Balance(layout)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 6, len(expected.ChannelAssignment))
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c3")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c4")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c5")].Node.ServerID)
|
||||
assert.NotEqual(t, int64(3), expected.ChannelAssignment[newChannelID("c6")].Node.ServerID)
|
||||
assert.Equal(t, 3, len(countByServerID(expected)))
|
||||
|
||||
// test allow rebalance = false, but there's some channel is not assigned.
|
||||
layout = newLayout(map[string]int{
|
||||
"c1": -1,
|
||||
"c2": 3,
|
||||
"c3": -1,
|
||||
"c4": -1,
|
||||
"c5": 3,
|
||||
"c6": 3,
|
||||
}, make(map[string]map[string]int64), []int64{1, 2, 3})
|
||||
layout.Config.AllowRebalance = false
|
||||
|
||||
expected, err = policy.Balance(layout)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 6, len(expected.ChannelAssignment))
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c2")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c5")].Node.ServerID)
|
||||
assert.Equal(t, int64(3), expected.ChannelAssignment[newChannelID("c6")].Node.ServerID)
|
||||
assert.NotEqual(t, int64(3), expected.ChannelAssignment[newChannelID("c1")].Node.ServerID)
|
||||
assert.NotEqual(t, int64(3), expected.ChannelAssignment[newChannelID("c3")].Node.ServerID)
|
||||
assert.NotEqual(t, int64(3), expected.ChannelAssignment[newChannelID("c4")].Node.ServerID)
|
||||
assert.Equal(t, 3, len(countByServerID(expected)))
|
||||
}
|
||||
|
||||
func countByServerID(expected balancer.ExpectedLayout) map[int64]int {
|
||||
counts := make(map[int64]int)
|
||||
for _, node := range expected.ChannelAssignment {
|
||||
@ -122,15 +220,22 @@ func newChannelID(channel string) types.ChannelID {
|
||||
// newLayout creates a new layout for test.
|
||||
func newLayout(channels map[string]int, vchannels map[string]map[string]int64, serverID []int64) balancer.CurrentLayout {
|
||||
layout := balancer.CurrentLayout{
|
||||
Config: balancer.CommonBalancePolicyConfig{
|
||||
AllowRebalance: true,
|
||||
AllowRebalanceRecoveryLagThreshold: 1 * time.Second,
|
||||
MinRebalanceIntervalThreshold: 1 * time.Second,
|
||||
},
|
||||
Channels: make(map[channel.ChannelID]types.PChannelInfo),
|
||||
Stats: make(map[channel.ChannelID]channel.PChannelStatsView),
|
||||
AllNodesInfo: make(map[int64]types.StreamingNodeInfo),
|
||||
AllNodesInfo: make(map[int64]types.StreamingNodeStatus),
|
||||
ChannelsToNodes: make(map[types.ChannelID]int64),
|
||||
ExpectedAccessMode: make(map[channel.ChannelID]types.AccessMode),
|
||||
}
|
||||
for _, id := range serverID {
|
||||
layout.AllNodesInfo[id] = types.StreamingNodeInfo{
|
||||
ServerID: id,
|
||||
layout.AllNodesInfo[id] = types.StreamingNodeStatus{
|
||||
StreamingNodeInfo: types.StreamingNodeInfo{
|
||||
ServerID: id,
|
||||
},
|
||||
}
|
||||
}
|
||||
for c, node := range channels {
|
||||
|
||||
@ -70,14 +70,24 @@ func (p *policy) Balance(currentLayout balancer.CurrentLayout) (layout balancer.
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Unassign some most unbalanced channel and reassign it, try to make the layout more balanced
|
||||
snapshot := expectedLayout.AssignmentSnapshot()
|
||||
if !currentLayout.Config.AllowRebalance {
|
||||
return balancer.ExpectedLayout{
|
||||
ChannelAssignment: snapshot.Assignments,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// The following code is the operation for the auto balance
|
||||
// 3. Unassign some most unbalanced channel and reassign it, try to make the layout more balanced
|
||||
reassignChannelIDs := make([]types.ChannelID, 0, p.cfg.RebalanceMaxStep)
|
||||
for i := 0; i < p.cfg.RebalanceMaxStep; i++ {
|
||||
if len(expectedLayout.Assignments) == 0 {
|
||||
break
|
||||
}
|
||||
channelID := expectedLayout.FindTheLeastUnbalanceScoreIncrementChannel()
|
||||
if channelID.IsZero() {
|
||||
break
|
||||
}
|
||||
expectedLayout.Unassign(channelID)
|
||||
reassignChannelIDs = append(reassignChannelIDs, channelID)
|
||||
}
|
||||
|
||||
@ -3,23 +3,43 @@ package balancer
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/samber/lo"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/streamingcoord/server/balancer/channel"
|
||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/streaming/util/types"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
)
|
||||
|
||||
// policiesBuilders is a map of registered balancer policiesBuilders.
|
||||
var policiesBuilders typeutil.ConcurrentMap[string, PolicyBuilder]
|
||||
|
||||
// newCommonBalancePolicyConfig returns the common balance policy config.
|
||||
func newCommonBalancePolicyConfig() CommonBalancePolicyConfig {
|
||||
params := paramtable.Get()
|
||||
return CommonBalancePolicyConfig{
|
||||
AllowRebalance: params.StreamingCfg.WALBalancerPolicyAllowRebalance.GetAsBool(),
|
||||
AllowRebalanceRecoveryLagThreshold: params.StreamingCfg.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold.GetAsDurationByParse(),
|
||||
MinRebalanceIntervalThreshold: params.StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.GetAsDurationByParse(),
|
||||
}
|
||||
}
|
||||
|
||||
// CommonBalancePolicyConfig is the config for balance policy.
|
||||
type CommonBalancePolicyConfig struct {
|
||||
AllowRebalance bool // Whether to allow rebalance.
|
||||
AllowRebalanceRecoveryLagThreshold time.Duration // The threshold of recovery lag for balance.
|
||||
MinRebalanceIntervalThreshold time.Duration // The min interval of rebalance.
|
||||
}
|
||||
|
||||
// CurrentLayout is the full topology of streaming node and pChannel.
|
||||
type CurrentLayout struct {
|
||||
Config CommonBalancePolicyConfig
|
||||
Channels map[channel.ChannelID]types.PChannelInfo
|
||||
Stats map[channel.ChannelID]channel.PChannelStatsView
|
||||
AllNodesInfo map[int64]types.StreamingNodeInfo // AllNodesInfo is the full information of all available streaming nodes and related pchannels (contain the node not assign anything on it).
|
||||
AllNodesInfo map[int64]types.StreamingNodeStatus // AllNodesInfo is the full information of all available streaming nodes and related pchannels (contain the node not assign anything on it).
|
||||
ChannelsToNodes map[types.ChannelID]int64 // ChannelsToNodes maps assigned channel name to node id.
|
||||
ExpectedAccessMode map[channel.ChannelID]types.AccessMode // ExpectedAccessMode is the expected access mode of all channel.
|
||||
}
|
||||
@ -54,6 +74,44 @@ func (layout *CurrentLayout) TotalNodes() int {
|
||||
return len(layout.AllNodesInfo)
|
||||
}
|
||||
|
||||
// AllowRebalance returns true if the balance of the pchannel is allowed.
|
||||
func (layout *CurrentLayout) AllowRebalance(channelID channel.ChannelID) bool {
|
||||
if !layout.Config.AllowRebalance {
|
||||
// If rebalance is not allowed, return false directly.
|
||||
return false
|
||||
}
|
||||
|
||||
// If the last assign timestamp is too close to the current time, rebalance is not allowed.
|
||||
if time.Since(layout.Stats[channelID].LastAssignTimestamp) < layout.Config.MinRebalanceIntervalThreshold {
|
||||
return false
|
||||
}
|
||||
|
||||
// If reach the recovery lag threshold, rebalance is not allowed.
|
||||
return !layout.isReachTheRecoveryLagThreshold(channelID)
|
||||
}
|
||||
|
||||
// isReachTheRecoveryLagThreshold returns true if the recovery lag of the pchannel is greater than the recovery lag threshold.
|
||||
func (layout *CurrentLayout) isReachTheRecoveryLagThreshold(channelID channel.ChannelID) bool {
|
||||
balanceAttr := layout.GetWALMetrics(channelID)
|
||||
if balanceAttr == nil {
|
||||
return false
|
||||
}
|
||||
r, ok := balanceAttr.(types.RWWALMetrics)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return r.RecoveryLag() > layout.Config.AllowRebalanceRecoveryLagThreshold
|
||||
}
|
||||
|
||||
// GetWALMetrics returns the WAL metrics of the pchannel.
|
||||
func (layout *CurrentLayout) GetWALMetrics(channelID channel.ChannelID) types.WALMetrics {
|
||||
node, ok := layout.ChannelsToNodes[channelID]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return layout.AllNodesInfo[node].Metrics.WALMetrics[channelID]
|
||||
}
|
||||
|
||||
// GetAllPChannelsSortedByVChannelCountDesc returns all pchannels sorted by vchannel count in descending order.
|
||||
func (layout *CurrentLayout) GetAllPChannelsSortedByVChannelCountDesc() []types.ChannelID {
|
||||
sorter := make(byVChannelCountDesc, 0, layout.TotalChannels())
|
||||
|
||||
@ -102,7 +102,7 @@ func (c *managerClientImpl) getAllStreamingNodeStatus(ctx context.Context, state
|
||||
}
|
||||
|
||||
g, _ := errgroup.WithContext(ctx)
|
||||
g.SetLimit(10)
|
||||
g.SetLimit(16)
|
||||
var mu sync.Mutex
|
||||
result := make(map[int64]*types.StreamingNodeStatus, len(state.State.Addresses))
|
||||
for serverID, session := range state.Sessions() {
|
||||
@ -113,24 +113,24 @@ func (c *managerClientImpl) getAllStreamingNodeStatus(ctx context.Context, state
|
||||
resp, err := manager.CollectStatus(ctx, &streamingpb.StreamingNodeManagerCollectStatusRequest{})
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
log.Warn("collect status failed, skip", zap.Int64("serverID", serverID), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
result[serverID] = &types.StreamingNodeStatus{
|
||||
StreamingNodeInfo: types.StreamingNodeInfo{
|
||||
ServerID: serverID,
|
||||
Address: address,
|
||||
},
|
||||
Err: err,
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warn("collect status failed, skip", zap.Int64("serverID", serverID), zap.Error(err))
|
||||
return err
|
||||
Metrics: types.NewStreamingNodeBalanceAttrsFromProto(resp.Metrics),
|
||||
Err: err,
|
||||
}
|
||||
log.Debug("collect status success", zap.Int64("serverID", serverID), zap.Any("status", resp))
|
||||
return nil
|
||||
})
|
||||
}
|
||||
g.Wait()
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
||||
@ -50,8 +50,11 @@ func (ms *managerServiceImpl) Remove(ctx context.Context, req *streamingpb.Strea
|
||||
|
||||
// CollectStatus collects the status of all wal instances in these streamingnode.
|
||||
func (ms *managerServiceImpl) CollectStatus(ctx context.Context, req *streamingpb.StreamingNodeManagerCollectStatusRequest) (*streamingpb.StreamingNodeManagerCollectStatusResponse, error) {
|
||||
// TODO: collect traffic metric for load balance.
|
||||
metrics, err := ms.walManager.Metrics()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &streamingpb.StreamingNodeManagerCollectStatusResponse{
|
||||
BalanceAttributes: &streamingpb.StreamingNodeBalanceAttributes{},
|
||||
Metrics: types.NewProtoFromStreamingNodeMetrics(*metrics),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -133,26 +133,6 @@ func (m *managerImpl) Metrics() (*types.StreamingNodeMetrics, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetAllAvailableChannels returns all available channel info.
|
||||
func (m *managerImpl) GetAllAvailableChannels() ([]types.PChannelInfo, error) {
|
||||
// reject operation if manager is closing.
|
||||
if !m.lifetime.AddIf(isGetable) {
|
||||
return nil, errWALManagerClosed
|
||||
}
|
||||
defer m.lifetime.Done()
|
||||
|
||||
// collect all available wal info.
|
||||
infos := make([]types.PChannelInfo, 0)
|
||||
m.wltMap.Range(func(channel string, lt *walLifetime) bool {
|
||||
if l := lt.GetWAL(); l != nil {
|
||||
info := l.Channel()
|
||||
infos = append(infos, info)
|
||||
}
|
||||
return true
|
||||
})
|
||||
return infos, nil
|
||||
}
|
||||
|
||||
// Close these manager and release all managed WAL.
|
||||
func (m *managerImpl) Close() {
|
||||
m.lifetime.SetState(managerRemoveable)
|
||||
|
||||
@ -42,6 +42,7 @@ func TestManager(t *testing.T) {
|
||||
l.EXPECT().IsAvailable().Return(true).Maybe()
|
||||
l.EXPECT().Close().Return()
|
||||
l.EXPECT().IsAvailable().Return(true).Maybe()
|
||||
l.EXPECT().Metrics().Return(types.RWWALMetrics{})
|
||||
return l, nil
|
||||
})
|
||||
opener.EXPECT().Close().Return()
|
||||
|
||||
@ -60,6 +60,7 @@ message PChannelMeta {
|
||||
PChannelMetaState state = 3; // state of the channel.
|
||||
repeated PChannelAssignmentLog histories =
|
||||
4; // keep the meta info assignment log that used to be assigned to.
|
||||
uint64 last_assign_timestamp_seconds = 5; // The last assigned timestamp in seconds.
|
||||
}
|
||||
|
||||
// StreamingVersion is the version of the streaming service.
|
||||
@ -486,12 +487,28 @@ message StreamingNodeManagerRemoveResponse {}
|
||||
|
||||
message StreamingNodeManagerCollectStatusRequest {}
|
||||
|
||||
message StreamingNodeBalanceAttributes {
|
||||
// TODO: traffic of pchannel or other things.
|
||||
message StreamingNodeMetrics {
|
||||
repeated StreamingNodeWALMetrics wals = 1;
|
||||
}
|
||||
|
||||
message StreamingNodeWALMetrics {
|
||||
PChannelInfo info = 1;
|
||||
oneof metrics {
|
||||
StreamingNodeRWWALMetrics rw = 2;
|
||||
StreamingNodeROWALMetrics ro = 3;
|
||||
};
|
||||
}
|
||||
|
||||
message StreamingNodeRWWALMetrics {
|
||||
uint64 mvcc_time_tick = 1; // The mvcc time tick of the pchannel.
|
||||
uint64 recovery_time_tick = 2; // The recovery time tick of the pchannel.
|
||||
}
|
||||
|
||||
message StreamingNodeROWALMetrics {
|
||||
}
|
||||
|
||||
message StreamingNodeManagerCollectStatusResponse {
|
||||
StreamingNodeBalanceAttributes balance_attributes = 1;
|
||||
StreamingNodeMetrics metrics = 1;
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -74,8 +74,8 @@ func (n StreamingNodeInfo) String() string {
|
||||
// StreamingNodeStatus is the information of a streaming node.
|
||||
type StreamingNodeStatus struct {
|
||||
StreamingNodeInfo
|
||||
// TODO: balance attributes should added here in future.
|
||||
Err error
|
||||
Metrics StreamingNodeMetrics
|
||||
Err error
|
||||
}
|
||||
|
||||
// IsHealthy returns whether the streaming node is healthy.
|
||||
|
||||
@ -3,6 +3,7 @@ package types
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/streamingpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/tsoutil"
|
||||
)
|
||||
|
||||
@ -44,3 +45,53 @@ func (ROWALMetrics) isWALMetrics() {}
|
||||
type StreamingNodeMetrics struct {
|
||||
WALMetrics map[ChannelID]WALMetrics
|
||||
}
|
||||
|
||||
func NewStreamingNodeBalanceAttrsFromProto(m *streamingpb.StreamingNodeMetrics) StreamingNodeMetrics {
|
||||
channels := make(map[ChannelID]WALMetrics)
|
||||
for _, l := range m.GetWals() {
|
||||
switch balanceAttr := l.Metrics.(type) {
|
||||
case *streamingpb.StreamingNodeWALMetrics_Ro:
|
||||
channels[ChannelID{Name: l.Info.Name}] = ROWALMetrics{
|
||||
ChannelInfo: NewPChannelInfoFromProto(l.Info),
|
||||
}
|
||||
case *streamingpb.StreamingNodeWALMetrics_Rw:
|
||||
channels[ChannelID{Name: l.Info.Name}] = RWWALMetrics{
|
||||
ChannelInfo: NewPChannelInfoFromProto(l.Info),
|
||||
MVCCTimeTick: balanceAttr.Rw.MvccTimeTick,
|
||||
RecoveryTimeTick: balanceAttr.Rw.RecoveryTimeTick,
|
||||
}
|
||||
}
|
||||
}
|
||||
return StreamingNodeMetrics{
|
||||
WALMetrics: channels,
|
||||
}
|
||||
}
|
||||
|
||||
// NewProtoFrom
|
||||
func NewProtoFromStreamingNodeMetrics(info StreamingNodeMetrics) *streamingpb.StreamingNodeMetrics {
|
||||
wals := make([]*streamingpb.StreamingNodeWALMetrics, 0, len(info.WALMetrics))
|
||||
for _, metrics := range info.WALMetrics {
|
||||
switch metrics := metrics.(type) {
|
||||
case RWWALMetrics:
|
||||
wals = append(wals, &streamingpb.StreamingNodeWALMetrics{
|
||||
Info: NewProtoFromPChannelInfo(metrics.ChannelInfo),
|
||||
Metrics: &streamingpb.StreamingNodeWALMetrics_Rw{
|
||||
Rw: &streamingpb.StreamingNodeRWWALMetrics{
|
||||
MvccTimeTick: metrics.MVCCTimeTick,
|
||||
RecoveryTimeTick: metrics.RecoveryTimeTick,
|
||||
},
|
||||
},
|
||||
})
|
||||
case ROWALMetrics:
|
||||
wals = append(wals, &streamingpb.StreamingNodeWALMetrics{
|
||||
Info: NewProtoFromPChannelInfo(metrics.ChannelInfo),
|
||||
Metrics: &streamingpb.StreamingNodeWALMetrics_Ro{
|
||||
Ro: &streamingpb.StreamingNodeROWALMetrics{},
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
return &streamingpb.StreamingNodeMetrics{
|
||||
Wals: wals,
|
||||
}
|
||||
}
|
||||
|
||||
@ -5723,12 +5723,15 @@ type streamingConfig struct {
|
||||
WALBalancerOperationTimeout ParamItem `refreshable:"true"`
|
||||
|
||||
// balancer Policy
|
||||
WALBalancerPolicyName ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairPChannelWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairVChannelWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairAntiAffinityWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairRebalanceTolerance ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairRebalanceMaxStep ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyName ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyAllowRebalance ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyMinRebalanceIntervalThreshold ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyAllowRebalanceRecoveryLagThreshold ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairPChannelWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairVChannelWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairAntiAffinityWeight ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairRebalanceTolerance ParamItem `refreshable:"true"`
|
||||
WALBalancerPolicyVChannelFairRebalanceMaxStep ParamItem `refreshable:"true"`
|
||||
|
||||
// broadcaster
|
||||
WALBroadcasterConcurrencyRatio ParamItem `refreshable:"false"`
|
||||
@ -5815,6 +5818,35 @@ If the operation exceeds this timeout, it will be canceled.`,
|
||||
}
|
||||
p.WALBalancerPolicyName.Init(base.mgr)
|
||||
|
||||
p.WALBalancerPolicyAllowRebalance = ParamItem{
|
||||
Key: "streaming.walBalancer.balancePolicy.allowRebalance",
|
||||
Version: "2.6.0",
|
||||
Doc: `Whether to allow rebalance, true by default.
|
||||
If the rebalance is not allowed, only the lost wal recovery will be executed, the rebalance (move a pchannel from one node to another node) will be skipped.`,
|
||||
DefaultValue: "true",
|
||||
Export: true,
|
||||
}
|
||||
p.WALBalancerPolicyAllowRebalance.Init(base.mgr)
|
||||
|
||||
p.WALBalancerPolicyMinRebalanceIntervalThreshold = ParamItem{
|
||||
Key: "streaming.walBalancer.balancePolicy.minRebalanceIntervalThreshold",
|
||||
Version: "2.6.0",
|
||||
Doc: `The max interval of rebalance for each wal, 5m by default.`,
|
||||
DefaultValue: "5m",
|
||||
Export: true,
|
||||
}
|
||||
p.WALBalancerPolicyMinRebalanceIntervalThreshold.Init(base.mgr)
|
||||
|
||||
p.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold = ParamItem{
|
||||
Key: "streaming.walBalancer.balancePolicy.allowRebalanceRecoveryLagThreshold",
|
||||
Version: "2.6.0",
|
||||
Doc: `The threshold of recovery lag for rebalance, 1s by default.
|
||||
If the recovery lag is greater than this threshold, the rebalance of current pchannel is not allowed.`,
|
||||
DefaultValue: "1s",
|
||||
Export: true,
|
||||
}
|
||||
p.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold.Init(base.mgr)
|
||||
|
||||
p.WALBalancerPolicyVChannelFairPChannelWeight = ParamItem{
|
||||
Key: "streaming.walBalancer.balancePolicy.vchannelFair.pchannelWeight",
|
||||
Version: "2.6.0",
|
||||
|
||||
@ -646,6 +646,9 @@ func TestComponentParam(t *testing.T) {
|
||||
assert.Equal(t, 5*time.Second, params.StreamingCfg.WALBalancerBackoffMaxInterval.GetAsDurationByParse())
|
||||
assert.Equal(t, 2.0, params.StreamingCfg.WALBalancerBackoffMultiplier.GetAsFloat())
|
||||
assert.Equal(t, "vchannelFair", params.StreamingCfg.WALBalancerPolicyName.GetValue())
|
||||
assert.Equal(t, true, params.StreamingCfg.WALBalancerPolicyAllowRebalance.GetAsBool())
|
||||
assert.Equal(t, 5*time.Minute, params.StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.GetAsDurationByParse())
|
||||
assert.Equal(t, 1*time.Second, params.StreamingCfg.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold.GetAsDurationByParse())
|
||||
assert.Equal(t, 0.4, params.StreamingCfg.WALBalancerPolicyVChannelFairPChannelWeight.GetAsFloat())
|
||||
assert.Equal(t, 0.3, params.StreamingCfg.WALBalancerPolicyVChannelFairVChannelWeight.GetAsFloat())
|
||||
assert.Equal(t, 0.01, params.StreamingCfg.WALBalancerPolicyVChannelFairAntiAffinityWeight.GetAsFloat())
|
||||
@ -680,6 +683,9 @@ func TestComponentParam(t *testing.T) {
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyVChannelFairAntiAffinityWeight.Key, "0.02")
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyVChannelFairRebalanceTolerance.Key, "0.02")
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyVChannelFairRebalanceMaxStep.Key, "4")
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyAllowRebalance.Key, "false")
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "10s")
|
||||
params.Save(params.StreamingCfg.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold.Key, "1s")
|
||||
params.Save(params.StreamingCfg.LoggingAppendSlowThreshold.Key, "3s")
|
||||
params.Save(params.StreamingCfg.WALRecoveryGracefulCloseTimeout.Key, "4s")
|
||||
params.Save(params.StreamingCfg.WALRecoveryMaxDirtyMessage.Key, "200")
|
||||
@ -694,6 +700,9 @@ func TestComponentParam(t *testing.T) {
|
||||
assert.Equal(t, 3.5, params.StreamingCfg.WALBalancerBackoffMultiplier.GetAsFloat())
|
||||
assert.Equal(t, 1.5, params.StreamingCfg.WALBroadcasterConcurrencyRatio.GetAsFloat())
|
||||
assert.Equal(t, "pchannelFair", params.StreamingCfg.WALBalancerPolicyName.GetValue())
|
||||
assert.Equal(t, false, params.StreamingCfg.WALBalancerPolicyAllowRebalance.GetAsBool())
|
||||
assert.Equal(t, 10*time.Second, params.StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.GetAsDurationByParse())
|
||||
assert.Equal(t, 1*time.Second, params.StreamingCfg.WALBalancerPolicyAllowRebalanceRecoveryLagThreshold.GetAsDurationByParse())
|
||||
assert.Equal(t, 0.5, params.StreamingCfg.WALBalancerPolicyVChannelFairPChannelWeight.GetAsFloat())
|
||||
assert.Equal(t, 0.4, params.StreamingCfg.WALBalancerPolicyVChannelFairVChannelWeight.GetAsFloat())
|
||||
assert.Equal(t, 0.02, params.StreamingCfg.WALBalancerPolicyVChannelFairAntiAffinityWeight.GetAsFloat())
|
||||
|
||||
@ -53,6 +53,7 @@ func (s *BalanceTestSuit) SetupSuite() {
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "100")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.AutoBalanceInterval.Key, "100")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
// disable compaction
|
||||
s.WithMilvusConfig(paramtable.Get().DataCoordCfg.EnableCompaction.Key, "false")
|
||||
|
||||
@ -53,6 +53,7 @@ func (s *ChannelExclusiveBalanceSuit) SetupSuite() {
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.Balancer.Key, meta.ChannelLevelScoreBalancerName)
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.ChannelExclusiveNodeFactor.Key, "2")
|
||||
s.WithMilvusConfig(paramtable.Get().DataCoordCfg.EnableCompaction.Key, "false")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
s.MiniClusterSuite.SetupSuite()
|
||||
}
|
||||
|
||||
@ -650,7 +650,7 @@ func (c *MiniClusterV3) getOptions() []process.Option {
|
||||
env := map[string]string{
|
||||
"QUOTAANDLIMITS_ENABLED": "false", // disable the quota limits by default to avoid rpc failure.
|
||||
"PROXY_IP": "localhost",
|
||||
"STRAEMINGNODE_IP": "localhost",
|
||||
"STREAMINGNODE_IP": "localhost",
|
||||
"QUERYNODE_IP": "localhost",
|
||||
"DATANODE_IP": "localhost",
|
||||
"ROOTCOORD_IP": "localhost",
|
||||
|
||||
@ -36,6 +36,7 @@ func (s *PartialSearchTestSuit) SetupSuite() {
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.AutoBalanceInterval.Key, "10000")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "10000")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.TaskExecutionCap.Key, "1")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
s.WithOptions(integration.WithDropAllCollectionsWhenTestTearDown())
|
||||
s.MiniClusterSuite.SetupSuite()
|
||||
|
||||
@ -48,6 +48,8 @@ type ReplicaTestSuit struct {
|
||||
func (s *ReplicaTestSuit) SetupSuite() {
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "1000")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
s.MiniClusterSuite.SetupSuite()
|
||||
}
|
||||
|
||||
|
||||
@ -57,6 +57,7 @@ func (s *LoadTestSuite) SetupSuite() {
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "100")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1")
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.CheckNodeInReplicaInterval.Key, "1")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
s.WithOptions(integration.WithDropAllCollectionsWhenTestTearDown())
|
||||
s.WithOptions(integration.WithoutResetDeploymentWhenTestTearDown())
|
||||
|
||||
@ -45,6 +45,8 @@ type ManualRollingUpgradeSuite struct {
|
||||
func (s *ManualRollingUpgradeSuite) SetupSuite() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
s.WithMilvusConfig(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "100")
|
||||
s.WithMilvusConfig(paramtable.Get().StreamingCfg.WALBalancerPolicyMinRebalanceIntervalThreshold.Key, "1ms")
|
||||
|
||||
s.MiniClusterSuite.SetupSuite()
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user