mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 10:08:42 +08:00
During the rolling upgrade from 2.5 to 2.6, the 2.5 coordinator detects newly started 2.6 proxies. However, 2.6 proxies do not sync timetick, which leads to timetick delay. This PR ignores 2.6 proxies to prevent ttDelay during the upgrade process. issue: https://github.com/milvus-io/milvus/issues/43518 Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
237 lines
6.8 KiB
Go
237 lines
6.8 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package proxyutil
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/blang/semver/v4"
|
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
|
v3rpc "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/milvus-io/milvus/internal/json"
|
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/lifetime"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
type ProxyWatcherInterface interface {
|
|
AddSessionFunc(fns ...func(*sessionutil.Session))
|
|
DelSessionFunc(fns ...func(*sessionutil.Session))
|
|
|
|
WatchProxy(ctx context.Context) error
|
|
Stop()
|
|
}
|
|
|
|
// ProxyWatcher manages proxy clients
|
|
type ProxyWatcher struct {
|
|
wg errgroup.Group
|
|
lock sync.Mutex
|
|
etcdCli *clientv3.Client
|
|
initSessionsFunc []func([]*sessionutil.Session)
|
|
addSessionsFunc []func(*sessionutil.Session)
|
|
delSessionsFunc []func(*sessionutil.Session)
|
|
|
|
closeOnce sync.Once
|
|
closeCh lifetime.SafeChan
|
|
}
|
|
|
|
// NewProxyWatcher helper function to create a proxyWatcher
|
|
// fns are the custom getSessions function list
|
|
func NewProxyWatcher(client *clientv3.Client, fns ...func([]*sessionutil.Session)) *ProxyWatcher {
|
|
p := &ProxyWatcher{
|
|
lock: sync.Mutex{},
|
|
etcdCli: client,
|
|
closeCh: lifetime.NewSafeChan(),
|
|
}
|
|
p.initSessionsFunc = append(p.initSessionsFunc, fns...)
|
|
return p
|
|
}
|
|
|
|
// AddSessionFunc adds functions to addSessions function list
|
|
func (p *ProxyWatcher) AddSessionFunc(fns ...func(*sessionutil.Session)) {
|
|
p.lock.Lock()
|
|
defer p.lock.Unlock()
|
|
p.addSessionsFunc = append(p.addSessionsFunc, fns...)
|
|
}
|
|
|
|
// DelSessionFunc add functions to delSessions function list
|
|
func (p *ProxyWatcher) DelSessionFunc(fns ...func(*sessionutil.Session)) {
|
|
p.lock.Lock()
|
|
defer p.lock.Unlock()
|
|
p.delSessionsFunc = append(p.delSessionsFunc, fns...)
|
|
}
|
|
|
|
// WatchProxy starts a goroutine to watch proxy session changes on etcd
|
|
func (p *ProxyWatcher) WatchProxy(ctx context.Context) error {
|
|
childCtx, cancel := context.WithTimeout(ctx, paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))
|
|
defer cancel()
|
|
|
|
sessions, rev, err := p.getSessionsOnEtcd(childCtx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Info("succeed to init sessions on etcd", zap.Any("sessions", sessions), zap.Int64("revision", rev))
|
|
// all init function should be clear meta firstly.
|
|
for _, f := range p.initSessionsFunc {
|
|
f(sessions)
|
|
}
|
|
|
|
eventCh := p.etcdCli.Watch(
|
|
ctx,
|
|
path.Join(paramtable.Get().EtcdCfg.MetaRootPath.GetValue(), sessionutil.DefaultServiceRoot, typeutil.ProxyRole),
|
|
clientv3.WithPrefix(),
|
|
clientv3.WithCreatedNotify(),
|
|
clientv3.WithPrevKV(),
|
|
clientv3.WithRev(rev+1),
|
|
)
|
|
|
|
p.wg.Go(func() error {
|
|
p.startWatchEtcd(ctx, eventCh)
|
|
return nil
|
|
})
|
|
return nil
|
|
}
|
|
|
|
func (p *ProxyWatcher) startWatchEtcd(ctx context.Context, eventCh clientv3.WatchChan) {
|
|
log.Info("start to watch etcd")
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
log.Warn("stop watching etcd loop")
|
|
return
|
|
|
|
case <-p.closeCh.CloseCh():
|
|
log.Warn("stop watching etcd loop")
|
|
return
|
|
|
|
case event, ok := <-eventCh:
|
|
if !ok {
|
|
log.Warn("stop watching etcd loop due to closed etcd event channel")
|
|
panic("stop watching etcd loop due to closed etcd event channel")
|
|
}
|
|
if err := event.Err(); err != nil {
|
|
if err == v3rpc.ErrCompacted {
|
|
err2 := p.WatchProxy(ctx)
|
|
if err2 != nil {
|
|
log.Error("re watch proxy fails when etcd has a compaction error",
|
|
zap.Error(err), zap.Error(err2))
|
|
panic("failed to handle etcd request, exit..")
|
|
}
|
|
return
|
|
}
|
|
log.Error("Watch proxy service failed", zap.Error(err))
|
|
panic(err)
|
|
}
|
|
for _, e := range event.Events {
|
|
var err error
|
|
switch e.Type {
|
|
case mvccpb.PUT:
|
|
err = p.handlePutEvent(e)
|
|
case mvccpb.DELETE:
|
|
err = p.handleDeleteEvent(e)
|
|
}
|
|
if err != nil {
|
|
log.Warn("failed to handle proxy event", zap.Any("event", e), zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *ProxyWatcher) handlePutEvent(e *clientv3.Event) error {
|
|
session, err := p.parseSession(e.Kv.Value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Ctx(context.TODO()).Debug("received proxy put event with session", zap.Any("session", session))
|
|
rangeChecker := semver.MustParseRange(">=2.6.0-dev")
|
|
if rangeChecker(session.Version) {
|
|
log.Info("new proxy with no timetick join, ignored",
|
|
zap.String("version", session.Version.String()),
|
|
zap.Int64("serverID", session.ServerID),
|
|
zap.String("address", session.Address))
|
|
return nil
|
|
}
|
|
for _, f := range p.addSessionsFunc {
|
|
f(session)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *ProxyWatcher) handleDeleteEvent(e *clientv3.Event) error {
|
|
session, err := p.parseSession(e.PrevKv.Value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Ctx(context.TODO()).Debug("received proxy delete event with session", zap.Any("session", session))
|
|
for _, f := range p.delSessionsFunc {
|
|
f(session)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *ProxyWatcher) parseSession(value []byte) (*sessionutil.Session, error) {
|
|
session := new(sessionutil.Session)
|
|
err := json.Unmarshal(value, session)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return session, nil
|
|
}
|
|
|
|
func (p *ProxyWatcher) getSessionsOnEtcd(ctx context.Context) ([]*sessionutil.Session, int64, error) {
|
|
resp, err := p.etcdCli.Get(
|
|
ctx,
|
|
path.Join(paramtable.Get().EtcdCfg.MetaRootPath.GetValue(), sessionutil.DefaultServiceRoot, typeutil.ProxyRole),
|
|
clientv3.WithPrefix(),
|
|
clientv3.WithSort(clientv3.SortByKey, clientv3.SortAscend),
|
|
)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("proxy manager failed to watch proxy with error %w", err)
|
|
}
|
|
|
|
var sessions []*sessionutil.Session
|
|
for _, v := range resp.Kvs {
|
|
session, err := p.parseSession(v.Value)
|
|
if err != nil {
|
|
log.Warn("failed to unmarshal session", zap.Error(err))
|
|
return nil, 0, err
|
|
}
|
|
sessions = append(sessions, session)
|
|
}
|
|
|
|
return sessions, resp.Header.Revision, nil
|
|
}
|
|
|
|
// Stop stops the ProxyManager
|
|
func (p *ProxyWatcher) Stop() {
|
|
p.closeOnce.Do(func() {
|
|
p.closeCh.Close()
|
|
p.wg.Wait()
|
|
})
|
|
}
|