fix: support upgrading from 2.6.x to 2.6.5 (#45264)

issue: #43897

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
Zhen Ye 2025-11-04 18:31:32 +08:00 committed by GitHub
parent 06933c25b8
commit 966ebfbcab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 163 additions and 9 deletions

View File

@ -112,6 +112,7 @@ func initStreamingSystem(t *testing.T) {
<-ctx.Done()
return ctx.Err()
}).Maybe()
b.EXPECT().WaitUntilWALbasedDDLReady(mock.Anything).Return(nil).Maybe()
b.EXPECT().Close().Return().Maybe()
balance.Register(b)
channel.ResetStaticPChannelStatsManager()

View File

@ -573,6 +573,52 @@ func (_c *MockBalancer_UpdateReplicateConfiguration_Call) RunAndReturn(run func(
return _c
}
// WaitUntilWALbasedDDLReady provides a mock function with given fields: ctx
func (_m *MockBalancer) WaitUntilWALbasedDDLReady(ctx context.Context) error {
ret := _m.Called(ctx)
if len(ret) == 0 {
panic("no return value specified for WaitUntilWALbasedDDLReady")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context) error); ok {
r0 = rf(ctx)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockBalancer_WaitUntilWALbasedDDLReady_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WaitUntilWALbasedDDLReady'
type MockBalancer_WaitUntilWALbasedDDLReady_Call struct {
*mock.Call
}
// WaitUntilWALbasedDDLReady is a helper method to define mock.On call
// - ctx context.Context
func (_e *MockBalancer_Expecter) WaitUntilWALbasedDDLReady(ctx interface{}) *MockBalancer_WaitUntilWALbasedDDLReady_Call {
return &MockBalancer_WaitUntilWALbasedDDLReady_Call{Call: _e.mock.On("WaitUntilWALbasedDDLReady", ctx)}
}
func (_c *MockBalancer_WaitUntilWALbasedDDLReady_Call) Run(run func(ctx context.Context)) *MockBalancer_WaitUntilWALbasedDDLReady_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context))
})
return _c
}
func (_c *MockBalancer_WaitUntilWALbasedDDLReady_Call) Return(_a0 error) *MockBalancer_WaitUntilWALbasedDDLReady_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockBalancer_WaitUntilWALbasedDDLReady_Call) RunAndReturn(run func(context.Context) error) *MockBalancer_WaitUntilWALbasedDDLReady_Call {
_c.Call.Return(run)
return _c
}
// WatchChannelAssignments provides a mock function with given fields: ctx, cb
func (_m *MockBalancer) WatchChannelAssignments(ctx context.Context, cb balancer.WatchChannelAssignmentsCallback) error {
ret := _m.Called(ctx, cb)

View File

@ -34,12 +34,14 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/rgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/coordinator/snmanager"
"github.com/milvus-io/milvus/internal/distributed/streaming"
"github.com/milvus-io/milvus/internal/json"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/metastore/kv/querycoord"
"github.com/milvus-io/milvus/internal/mocks/distributed/mock_streaming"
"github.com/milvus-io/milvus/internal/mocks/streamingcoord/server/mock_balancer"
"github.com/milvus-io/milvus/internal/mocks/streamingcoord/server/mock_broadcaster"
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
"github.com/milvus-io/milvus/internal/querycoordv2/checkers"
@ -51,6 +53,8 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/streamingcoord/server/balancer"
sbalance "github.com/milvus-io/milvus/internal/streamingcoord/server/balancer/balance"
"github.com/milvus-io/milvus/internal/streamingcoord/server/broadcaster/broadcast"
"github.com/milvus-io/milvus/internal/streamingcoord/server/broadcaster/registry"
"github.com/milvus-io/milvus/internal/util/proxyutil"
@ -116,6 +120,16 @@ func initStreamingSystem() {
wal.EXPECT().ControlChannel().Return(funcutil.GetControlChannel("by-dev-rootcoord-dml_0"))
streaming.SetWALForTest(wal)
snmanager.ResetStreamingNodeManager()
b := mock_balancer.NewMockBalancer(t)
b.EXPECT().WaitUntilWALbasedDDLReady(mock.Anything).Return(nil).Maybe()
b.EXPECT().WatchChannelAssignments(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, cb balancer.WatchChannelAssignmentsCallback) error {
<-ctx.Done()
return ctx.Err()
}).Maybe()
b.EXPECT().Close().Return().Maybe()
sbalance.Register(b)
bapi := mock_broadcaster.NewMockBroadcastAPI(t)
bapi.EXPECT().Broadcast(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, msg message.BroadcastMutableMessage) (*types.BroadcastAppendResult, error) {
results := make(map[string]*message.AppendResult)

View File

@ -146,6 +146,7 @@ func initStreamingSystemAndCore(t *testing.T) *Core {
}
return vchannels, nil
}).Maybe()
b.EXPECT().WaitUntilWALbasedDDLReady(mock.Anything).Return(nil).Maybe()
b.EXPECT().WatchChannelAssignments(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, callback balancer.WatchChannelAssignmentsCallback) error {
<-ctx.Done()
return ctx.Err()

View File

@ -44,6 +44,9 @@ type Balancer interface {
// ReplicateRole returns the replicate role of the balancer.
ReplicateRole() replicateutil.Role
// WaitUntilWALbasedDDLReady waits until the WAL based DDL is ready.
WaitUntilWALbasedDDLReady(ctx context.Context) error
// RegisterStreamingEnabledNotifier registers a notifier into the balancer.
// If the error is returned, the balancer is closed.
// Otherwise, the following rules are applied:

View File

@ -26,6 +26,7 @@ import (
const (
versionChecker260 = "<2.6.0-dev"
versionChecker265 = "<2.6.5-dev"
)
// RecoverBalancer recover the balancer working.
@ -110,6 +111,20 @@ func (b *balancerImpl) GetLatestWALLocated(ctx context.Context, pchannel string)
return b.channelMetaManager.GetLatestWALLocated(ctx, pchannel)
}
// WaitUntilWALbasedDDLReady waits until the WAL based DDL is ready.
func (b *balancerImpl) WaitUntilWALbasedDDLReady(ctx context.Context) error {
if b.channelMetaManager.IsWALBasedDDLEnabled() {
return nil
}
if err := b.channelMetaManager.WaitUntilStreamingEnabled(ctx); err != nil {
return err
}
if err := b.blockUntilRoleGreaterThanVersion(ctx, typeutil.StreamingNodeRole, versionChecker265); err != nil {
return err
}
return b.channelMetaManager.MarkWALBasedDDLEnabled(ctx)
}
// WatchChannelAssignments watches the balance result.
func (b *balancerImpl) WatchChannelAssignments(ctx context.Context, cb WatchChannelAssignmentsCallback) error {
if !b.lifetime.Add(typeutil.LifetimeStateWorking) {
@ -335,20 +350,20 @@ func (b *balancerImpl) checkIfRoleGreaterThan260(ctx context.Context, role strin
func (b *balancerImpl) blockUntilAllNodeIsGreaterThan260AtBackground(ctx context.Context) error {
expectedRoles := []string{typeutil.ProxyRole, typeutil.DataNodeRole, typeutil.QueryNodeRole}
for _, role := range expectedRoles {
if err := b.blockUntilRoleGreaterThan260AtBackground(ctx, role); err != nil {
if err := b.blockUntilRoleGreaterThanVersion(ctx, role, versionChecker260); err != nil {
return err
}
}
return b.channelMetaManager.MarkStreamingHasEnabled(ctx)
}
// blockUntilRoleGreaterThan260AtBackground block until the role is greater than 2.6.0 at background.
func (b *balancerImpl) blockUntilRoleGreaterThan260AtBackground(ctx context.Context, role string) error {
// blockUntilRoleGreaterThanVersion block until the role is greater than 2.6.0 at background.
func (b *balancerImpl) blockUntilRoleGreaterThanVersion(ctx context.Context, role string, versionChecker string) error {
doneErr := errors.New("done")
logger := b.Logger().With(zap.String("role", role))
logger.Info("start to wait that the nodes is greater than 2.6.0")
logger.Info("start to wait that the nodes is greater than version", zap.String("version", versionChecker))
// Check if there's any proxy or data node with version < 2.6.0.
rb := resolver.NewSessionBuilder(resource.Resource().ETCD(), sessionutil.GetSessionPrefixByRole(role), versionChecker260)
rb := resolver.NewSessionBuilder(resource.Resource().ETCD(), sessionutil.GetSessionPrefixByRole(role), versionChecker)
defer rb.Close()
r := rb.Resolver()
@ -360,10 +375,10 @@ func (b *balancerImpl) blockUntilRoleGreaterThan260AtBackground(ctx context.Cont
return nil
})
if err != nil && !errors.Is(err, doneErr) {
logger.Info("fail to wait that the nodes is greater than 2.6.0", zap.Error(err))
logger.Info("fail to wait that the nodes is greater than version", zap.String("version", versionChecker), zap.Error(err))
return err
}
logger.Info("all nodes is greater than 2.6.0 when watching")
logger.Info("all nodes is greater than version when watching", zap.String("version", versionChecker))
return nil
}

View File

@ -23,6 +23,11 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
const (
StreamingVersion260 = 1 // streaming version that since 2.6.0, the streaming based WAL is available.
StreamingVersion265 = 2 // streaming version that since 2.6.5, the WAL based DDL is available.
)
var ErrChannelNotExist = errors.New("channel not exist")
type (
@ -184,6 +189,26 @@ func (cm *ChannelManager) IsStreamingEnabledOnce() bool {
return cm.streamingVersion != nil
}
// WaitUntilStreamingEnabled waits until the streaming service is enabled.
func (cm *ChannelManager) WaitUntilStreamingEnabled(ctx context.Context) error {
cm.cond.L.Lock()
for cm.streamingVersion == nil {
if err := cm.cond.Wait(ctx); err != nil {
return err
}
}
cm.cond.L.Unlock()
return nil
}
// IsWALBasedDDLEnabled returns true if the WAL based DDL is enabled.
func (cm *ChannelManager) IsWALBasedDDLEnabled() bool {
cm.cond.L.Lock()
defer cm.cond.L.Unlock()
return cm.streamingVersion != nil && cm.streamingVersion.Version >= StreamingVersion265
}
// ReplicateRole returns the replicate role of the channel manager.
func (cm *ChannelManager) ReplicateRole() replicateutil.Role {
cm.cond.L.Lock()
@ -211,8 +236,12 @@ func (cm *ChannelManager) MarkStreamingHasEnabled(ctx context.Context) error {
cm.cond.L.Lock()
defer cm.cond.L.Unlock()
if cm.streamingVersion != nil {
return nil
}
cm.streamingVersion = &streamingpb.StreamingVersion{
Version: 1,
Version: StreamingVersion260,
}
if err := resource.Resource().StreamingCatalog().SaveVersion(ctx, cm.streamingVersion); err != nil {
@ -232,6 +261,24 @@ func (cm *ChannelManager) MarkStreamingHasEnabled(ctx context.Context) error {
return nil
}
func (cm *ChannelManager) MarkWALBasedDDLEnabled(ctx context.Context) error {
cm.cond.L.Lock()
defer cm.cond.L.Unlock()
if cm.streamingVersion == nil {
return errors.New("streaming service is not enabled, cannot mark WAL based DDL enabled")
}
if cm.streamingVersion.Version >= StreamingVersion265 {
return nil
}
cm.streamingVersion.Version = StreamingVersion265
if err := resource.Resource().StreamingCatalog().SaveVersion(ctx, cm.streamingVersion); err != nil {
cm.Logger().Error("failed to save streaming version", zap.Error(err))
return err
}
return nil
}
// CurrentPChannelsView returns the current view of pchannels.
func (cm *ChannelManager) CurrentPChannelsView() *PChannelView {
cm.cond.L.Lock()

View File

@ -3,6 +3,9 @@ package broadcast
import (
"context"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus/internal/streamingcoord/server/balancer/balance"
"github.com/milvus-io/milvus/internal/streamingcoord/server/broadcaster"
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
"github.com/milvus-io/milvus/pkg/v2/util/syncutil"
@ -30,6 +33,13 @@ func StartBroadcastWithResourceKeys(ctx context.Context, resourceKeys ...message
if err != nil {
return nil, err
}
b, err := balance.GetWithContext(ctx)
if err != nil {
return nil, err
}
if err := b.WaitUntilWALbasedDDLReady(ctx); err != nil {
return nil, errors.Wrap(err, "failed to wait until WAL based DDL ready")
}
return broadcaster.WithResourceKeys(ctx, resourceKeys...)
}

View File

@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/mock"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/coordinator/snmanager"
"github.com/milvus-io/milvus/internal/distributed/streaming"
"github.com/milvus-io/milvus/internal/mocks/distributed/mock_streaming"
"github.com/milvus-io/milvus/internal/mocks/streamingcoord/server/mock_balancer"
@ -37,7 +38,9 @@ func TestAssignmentService(t *testing.T) {
broadcast.ResetBroadcaster()
// Set up the balancer
snmanager.ResetStreamingNodeManager()
b := mock_balancer.NewMockBalancer(t)
b.EXPECT().WaitUntilWALbasedDDLReady(mock.Anything).Return(nil).Maybe()
b.EXPECT().WatchChannelAssignments(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, cb balancer.WatchChannelAssignmentsCallback) error {
<-ctx.Done()
return ctx.Err()

View File

@ -10,7 +10,11 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus/internal/coordinator/snmanager"
"github.com/milvus-io/milvus/internal/mocks/streamingcoord/server/mock_balancer"
"github.com/milvus-io/milvus/internal/mocks/streamingcoord/server/mock_broadcaster"
"github.com/milvus-io/milvus/internal/streamingcoord/server/balancer"
"github.com/milvus-io/milvus/internal/streamingcoord/server/balancer/balance"
"github.com/milvus-io/milvus/internal/streamingcoord/server/broadcaster"
"github.com/milvus-io/milvus/internal/streamingcoord/server/broadcaster/broadcast"
"github.com/milvus-io/milvus/pkg/v2/proto/streamingpb"
@ -22,6 +26,16 @@ import (
func TestBroadcastService(t *testing.T) {
broadcast.ResetBroadcaster()
snmanager.ResetStreamingNodeManager()
// Set up the balancer
b := mock_balancer.NewMockBalancer(t)
b.EXPECT().WaitUntilWALbasedDDLReady(mock.Anything).Return(nil).Maybe()
b.EXPECT().WatchChannelAssignments(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, cb balancer.WatchChannelAssignmentsCallback) error {
<-ctx.Done()
return ctx.Err()
})
b.EXPECT().Close().Return().Maybe()
balance.Register(b)
fb := syncutil.NewFuture[broadcaster.Broadcaster]()
mba := mock_broadcaster.NewMockBroadcastAPI(t)

View File

@ -6,5 +6,5 @@ import semver "github.com/blang/semver/v4"
var Version semver.Version
func init() {
Version = semver.MustParse("2.6.3")
Version = semver.MustParse("2.6.5-dev")
}