mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
issue: #45728 pr: #45730 When mixcoord is in standby mode and shutdown is triggered, the ProcessActiveStandBy goroutine may panic if context cancellation occurs. This happens because the error handling didn't check for context.Canceled errors before panicking. Changes: - Add context cancellation check in mix_coord Register() before panic - Check s.ctx.Err() == context.Canceled and gracefully exit - Remove unused ForceActiveStandby() function from session_util This ensures standby mixcoord can shutdown gracefully without panic when context is cancelled during the standby process. Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
parent
5ba7c4ed35
commit
b2ef076597
@ -121,6 +121,10 @@ func (s *mixCoordImpl) Register() error {
|
|||||||
if s.enableActiveStandBy {
|
if s.enableActiveStandBy {
|
||||||
go func() {
|
go func() {
|
||||||
if err := s.session.ProcessActiveStandBy(s.activateFunc); err != nil {
|
if err := s.session.ProcessActiveStandBy(s.activateFunc); err != nil {
|
||||||
|
if s.ctx.Err() == context.Canceled {
|
||||||
|
log.Info("standby process canceled due to server shutdown")
|
||||||
|
return
|
||||||
|
}
|
||||||
log.Error("failed to activate standby server", zap.Error(err))
|
log.Error("failed to activate standby server", zap.Error(err))
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -69,52 +69,6 @@ func (_c *MockSession_Disconnected_Call) RunAndReturn(run func() bool) *MockSess
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
// ForceActiveStandby provides a mock function with given fields: activateFunc
|
|
||||||
func (_m *MockSession) ForceActiveStandby(activateFunc func() error) error {
|
|
||||||
ret := _m.Called(activateFunc)
|
|
||||||
|
|
||||||
if len(ret) == 0 {
|
|
||||||
panic("no return value specified for ForceActiveStandby")
|
|
||||||
}
|
|
||||||
|
|
||||||
var r0 error
|
|
||||||
if rf, ok := ret.Get(0).(func(func() error) error); ok {
|
|
||||||
r0 = rf(activateFunc)
|
|
||||||
} else {
|
|
||||||
r0 = ret.Error(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return r0
|
|
||||||
}
|
|
||||||
|
|
||||||
// MockSession_ForceActiveStandby_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ForceActiveStandby'
|
|
||||||
type MockSession_ForceActiveStandby_Call struct {
|
|
||||||
*mock.Call
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForceActiveStandby is a helper method to define mock.On call
|
|
||||||
// - activateFunc func() error
|
|
||||||
func (_e *MockSession_Expecter) ForceActiveStandby(activateFunc interface{}) *MockSession_ForceActiveStandby_Call {
|
|
||||||
return &MockSession_ForceActiveStandby_Call{Call: _e.mock.On("ForceActiveStandby", activateFunc)}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockSession_ForceActiveStandby_Call) Run(run func(activateFunc func() error)) *MockSession_ForceActiveStandby_Call {
|
|
||||||
_c.Call.Run(func(args mock.Arguments) {
|
|
||||||
run(args[0].(func() error))
|
|
||||||
})
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockSession_ForceActiveStandby_Call) Return(_a0 error) *MockSession_ForceActiveStandby_Call {
|
|
||||||
_c.Call.Return(_a0)
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockSession_ForceActiveStandby_Call) RunAndReturn(run func(func() error) error) *MockSession_ForceActiveStandby_Call {
|
|
||||||
_c.Call.Return(run)
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetAddress provides a mock function with no fields
|
// GetAddress provides a mock function with no fields
|
||||||
func (_m *MockSession) GetAddress() string {
|
func (_m *MockSession) GetAddress() string {
|
||||||
ret := _m.Called()
|
ret := _m.Called()
|
||||||
|
|||||||
@ -45,7 +45,6 @@ type SessionInterface interface {
|
|||||||
Disconnected() bool
|
Disconnected() bool
|
||||||
SetEnableActiveStandBy(enable bool)
|
SetEnableActiveStandBy(enable bool)
|
||||||
ProcessActiveStandBy(activateFunc func() error) error
|
ProcessActiveStandBy(activateFunc func() error) error
|
||||||
ForceActiveStandby(activateFunc func() error) error
|
|
||||||
|
|
||||||
GetAddress() string
|
GetAddress() string
|
||||||
GetServerID() int64
|
GetServerID() int64
|
||||||
|
|||||||
@ -1230,73 +1230,6 @@ func (s *Session) ProcessActiveStandBy(activateFunc func() error) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Session) ForceActiveStandby(activateFunc func() error) error {
|
|
||||||
s.activeKey = path.Join(s.metaRoot, DefaultServiceRoot, s.ServerName)
|
|
||||||
|
|
||||||
// force register to the active_key.
|
|
||||||
forceRegisterActiveFn := func() error {
|
|
||||||
log.Info(fmt.Sprintf("try to register as ACTIVE %v service...", s.ServerName))
|
|
||||||
sessionJSON, err := json.Marshal(s)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("json marshal error", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// try to release old session first
|
|
||||||
sessions, _, err := s.GetSessions(s.ServerName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(sessions) != 0 {
|
|
||||||
activeSess := sessions[s.ServerName]
|
|
||||||
if activeSess == nil || activeSess.LeaseID == nil {
|
|
||||||
// force delete all old sessions
|
|
||||||
s.etcdCli.Delete(s.ctx, s.activeKey)
|
|
||||||
for _, sess := range sessions {
|
|
||||||
if sess.ServerID != s.ServerID {
|
|
||||||
sess.getCompleteKey()
|
|
||||||
key := path.Join(s.metaRoot, DefaultServiceRoot, fmt.Sprintf("%s-%d", sess.ServerName, sess.ServerID))
|
|
||||||
s.etcdCli.Delete(s.ctx, key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// force release old active session
|
|
||||||
_, _ = s.etcdCli.Revoke(s.ctx, *activeSess.LeaseID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// then try to register as active
|
|
||||||
resp, err := s.etcdCli.Txn(s.ctx).If(
|
|
||||||
clientv3.Compare(
|
|
||||||
clientv3.Version(s.activeKey),
|
|
||||||
"=",
|
|
||||||
0)).
|
|
||||||
Then(clientv3.OpPut(s.activeKey, string(sessionJSON), clientv3.WithLease(*s.LeaseID))).Commit()
|
|
||||||
|
|
||||||
if err != nil || !resp.Succeeded {
|
|
||||||
msg := fmt.Sprintf("failed to force register ACTIVE %s", s.ServerName)
|
|
||||||
log.Error(msg, zap.Error(err), zap.Any("resp", resp))
|
|
||||||
return errors.New(msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info(fmt.Sprintf("force register ACTIVE %s", s.ServerName))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
err := retry.Do(s.ctx, forceRegisterActiveFn, retry.Attempts(uint(s.sessionRetryTimes)))
|
|
||||||
if err != nil {
|
|
||||||
log.Warn(fmt.Sprintf("failed to force register ACTIVE %s", s.ServerName))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.updateStandby(false)
|
|
||||||
log.Info(fmt.Sprintf("serverName: %v quit STANDBY mode, this node will become ACTIVE, ID: %d", s.ServerName, s.ServerID))
|
|
||||||
if activateFunc != nil {
|
|
||||||
return activateFunc()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func filterEmptyStrings(s []string) []string {
|
func filterEmptyStrings(s []string) []string {
|
||||||
var filtered []string
|
var filtered []string
|
||||||
for _, str := range s {
|
for _, str := range s {
|
||||||
|
|||||||
@ -962,60 +962,6 @@ func (s *SessionSuite) TestRevoke() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SessionSuite) TestForceActiveWithLeaseID() {
|
|
||||||
ctx := context.Background()
|
|
||||||
role := "test"
|
|
||||||
sess1 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
||||||
sess1.Init(role, "normal1", false, false)
|
|
||||||
sess1.Register()
|
|
||||||
sess1.ProcessActiveStandBy(nil)
|
|
||||||
|
|
||||||
sess2 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
||||||
sess2.Init(role, "normal2", false, false)
|
|
||||||
sess2.Register()
|
|
||||||
sess2.ForceActiveStandby(nil)
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
sess1.Stop()
|
|
||||||
sess2.Stop()
|
|
||||||
}()
|
|
||||||
sessions, _, err := sess2.GetSessions(role)
|
|
||||||
s.NoError(err)
|
|
||||||
s.Len(sessions, 2)
|
|
||||||
sess := sessions[role]
|
|
||||||
s.NotNil(sess)
|
|
||||||
s.Equal(sess.Address, "normal2")
|
|
||||||
s.Equal(sess.ServerID, sess2.ServerID)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SessionSuite) TestForceActiveWithDelete() {
|
|
||||||
ctx := context.Background()
|
|
||||||
role := "test"
|
|
||||||
sess1 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
||||||
sess1.Init(role, "normal1", false, false)
|
|
||||||
sessionJSON, err := json.Marshal(sess1)
|
|
||||||
s.NoError(err)
|
|
||||||
s.client.Put(ctx, path.Join(s.metaRoot, DefaultServiceRoot, fmt.Sprintf("%s-%d", role, 1)), string(sessionJSON))
|
|
||||||
s.client.Put(ctx, path.Join(s.metaRoot, DefaultServiceRoot, role), string(sessionJSON))
|
|
||||||
|
|
||||||
sess2 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
||||||
sess2.Init(role, "normal2", false, false)
|
|
||||||
sess2.Register()
|
|
||||||
sess2.ForceActiveStandby(nil)
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
sess1.Stop()
|
|
||||||
sess2.Stop()
|
|
||||||
}()
|
|
||||||
sessions, _, err := sess2.GetSessions(role)
|
|
||||||
s.NoError(err)
|
|
||||||
s.Len(sessions, 2)
|
|
||||||
sess := sessions[role]
|
|
||||||
s.NotNil(sess)
|
|
||||||
s.Equal(sess.Address, "normal2")
|
|
||||||
s.Equal(sess.ServerID, sess2.ServerID)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SessionSuite) TestKeepAliveRetryActiveCancel() {
|
func (s *SessionSuite) TestKeepAliveRetryActiveCancel() {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
session := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
session := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user