mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
This PR mainly improve two items: 1. Target observer should refresh loading status during init time. An uninitialized loading status blocks search/query. Currently, the target observer refreshes every 10 seconds, i.e. we'd need to wait for 10s for no reason. That's also the reason why we constantly see false log "collection unloaded" upon mixcoord restarts. 2. Delete session when service is stopped. So that the new service doesn't need to wait for the previous session to expire (~10s). Item 1 is the major improvement of this PR, which should speed up init time by 10s. Item 2 is not a big concern in most cases as coordinators usually shut down after stop(). In those cases, coordinator restart triggers serverID change which further triggers an existing logic that deletes expired session. This PR only fixes rare cases where serverID doesn't change. integration test: `go test -tags dynamic -v -coverprofile=profile.out -covermode=atomic tests/integration/coordrecovery/coord_recovery_test.go -timeout=20m` Performance after the change: Average init time of coordinators: 10s Hardware: M2 Pro Test setup: 1000 collections with 1000 rows (dim=128) per collection. issue: #29409 Signed-off-by: yiwangdr <yiwangdr@gmail.com>
1106 lines
28 KiB
Go
1106 lines
28 KiB
Go
package sessionutil
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/rand"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/blang/semver/v4"
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"github.com/stretchr/testify/suite"
|
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
"go.etcd.io/etcd/server/v3/embed"
|
|
"go.etcd.io/etcd/server/v3/etcdserver/api/v3client"
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/util/etcd"
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
func TestGetServerIDConcurrently(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
defer etcdCli.Close()
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, metaRoot)
|
|
err = etcdKV.RemoveWithPrefix("")
|
|
assert.NoError(t, err)
|
|
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("")
|
|
|
|
var wg sync.WaitGroup
|
|
muList := sync.Mutex{}
|
|
|
|
s := NewSessionWithEtcd(ctx, metaRoot, etcdCli)
|
|
res := make([]int64, 0)
|
|
|
|
getIDFunc := func() {
|
|
s.checkIDExist()
|
|
id, err := s.getServerID()
|
|
assert.NoError(t, err)
|
|
muList.Lock()
|
|
res = append(res, id)
|
|
muList.Unlock()
|
|
wg.Done()
|
|
}
|
|
|
|
for i := 0; i < 10; i++ {
|
|
wg.Add(1)
|
|
go getIDFunc()
|
|
}
|
|
wg.Wait()
|
|
assert.ElementsMatch(t, []int64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, res)
|
|
}
|
|
|
|
func TestInit(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, metaRoot)
|
|
err = etcdKV.RemoveWithPrefix("")
|
|
assert.NoError(t, err)
|
|
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("")
|
|
|
|
s := NewSessionWithEtcd(ctx, metaRoot, etcdCli)
|
|
s.Init("inittest", "testAddr", false, false)
|
|
assert.NotEqual(t, int64(0), s.LeaseID)
|
|
assert.NotEqual(t, int64(0), s.ServerID)
|
|
s.Register()
|
|
sessions, _, err := s.GetSessions("inittest")
|
|
assert.NoError(t, err)
|
|
assert.Contains(t, sessions, "inittest-"+strconv.FormatInt(s.ServerID, 10))
|
|
}
|
|
|
|
func TestInitNoArgs(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, metaRoot)
|
|
err = etcdKV.RemoveWithPrefix("")
|
|
assert.NoError(t, err)
|
|
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("")
|
|
|
|
s := NewSession(ctx)
|
|
s.Init("inittest", "testAddr", false, false)
|
|
assert.NotEqual(t, int64(0), s.LeaseID)
|
|
assert.NotEqual(t, int64(0), s.ServerID)
|
|
s.Register()
|
|
sessions, _, err := s.GetSessions("inittest")
|
|
assert.NoError(t, err)
|
|
assert.Contains(t, sessions, "inittest-"+strconv.FormatInt(s.ServerID, 10))
|
|
}
|
|
|
|
func TestUpdateSessions(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
defer etcdCli.Close()
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, "")
|
|
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("")
|
|
|
|
var wg sync.WaitGroup
|
|
muList := sync.Mutex{}
|
|
|
|
s := NewSessionWithEtcd(ctx, metaRoot, etcdCli, WithResueNodeID(false))
|
|
|
|
sessions, rev, err := s.GetSessions("test")
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, len(sessions), 0)
|
|
eventCh := s.WatchServices("test", rev, nil)
|
|
|
|
sList := []*Session{}
|
|
|
|
getIDFunc := func() {
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
singleS := NewSessionWithEtcd(ctx, metaRoot, etcdCli, WithResueNodeID(false))
|
|
singleS.Init("test", "testAddr", false, false)
|
|
singleS.Register()
|
|
muList.Lock()
|
|
sList = append(sList, singleS)
|
|
muList.Unlock()
|
|
wg.Done()
|
|
}
|
|
|
|
for i := 0; i < 10; i++ {
|
|
wg.Add(1)
|
|
go getIDFunc()
|
|
}
|
|
wg.Wait()
|
|
|
|
assert.Eventually(t, func() bool {
|
|
sessions, _, _ := s.GetSessions("test")
|
|
return len(sessions) == 10
|
|
}, 10*time.Second, 100*time.Millisecond)
|
|
notExistSessions, _, _ := s.GetSessions("testt")
|
|
assert.Equal(t, len(notExistSessions), 0)
|
|
|
|
etcdKV.RemoveWithPrefix(metaRoot)
|
|
assert.Eventually(t, func() bool {
|
|
sessions, _, _ := s.GetSessions("test")
|
|
return len(sessions) == 0
|
|
}, 10*time.Second, 100*time.Millisecond)
|
|
|
|
sessionEvents := []*SessionEvent{}
|
|
addEventLen := 0
|
|
delEventLen := 0
|
|
eventLength := len(eventCh)
|
|
for i := 0; i < eventLength; i++ {
|
|
sessionEvent := <-eventCh
|
|
if sessionEvent.EventType == SessionAddEvent {
|
|
addEventLen++
|
|
}
|
|
if sessionEvent.EventType == SessionDelEvent {
|
|
delEventLen++
|
|
}
|
|
sessionEvents = append(sessionEvents, sessionEvent)
|
|
}
|
|
assert.Equal(t, len(sessionEvents), 20)
|
|
assert.Equal(t, addEventLen, 10)
|
|
assert.Equal(t, delEventLen, 10)
|
|
}
|
|
|
|
func TestSessionLivenessCheck(t *testing.T) {
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
s := NewSessionWithEtcd(context.Background(), metaRoot, etcdCli)
|
|
s.Register()
|
|
ch := make(chan struct{})
|
|
s.liveCh = ch
|
|
signal := make(chan struct{}, 1)
|
|
|
|
flag := atomic.NewBool(false)
|
|
s.LivenessCheck(context.Background(), func() {
|
|
flag.Store(true)
|
|
signal <- struct{}{}
|
|
})
|
|
assert.False(t, flag.Load())
|
|
|
|
// test liveCh receive event, liveness won't exit, callback won't trigger
|
|
ch <- struct{}{}
|
|
assert.False(t, flag.Load())
|
|
|
|
// test close liveCh, liveness exit, callback should trigger
|
|
close(ch)
|
|
<-signal
|
|
assert.True(t, flag.Load())
|
|
|
|
// test context done, liveness exit, callback shouldn't trigger
|
|
metaRoot = fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
s1 := NewSessionWithEtcd(context.Background(), metaRoot, etcdCli)
|
|
s1.Register()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
flag.Store(false)
|
|
|
|
s1.LivenessCheck(ctx, func() {
|
|
flag.Store(true)
|
|
signal <- struct{}{}
|
|
})
|
|
cancel()
|
|
assert.False(t, flag.Load())
|
|
|
|
// test context done, liveness start failed, callback should trigger
|
|
metaRoot = fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
s2 := NewSessionWithEtcd(context.Background(), metaRoot, etcdCli)
|
|
s2.Register()
|
|
ctx, cancel = context.WithCancel(context.Background())
|
|
signal = make(chan struct{}, 1)
|
|
flag.Store(false)
|
|
cancel()
|
|
s2.LivenessCheck(ctx, func() {
|
|
flag.Store(true)
|
|
signal <- struct{}{}
|
|
})
|
|
<-signal
|
|
assert.True(t, flag.Load())
|
|
}
|
|
|
|
func TestWatcherHandleWatchResp(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
defer etcdCli.Close()
|
|
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, "/by-dev/session-ut")
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("/by-dev/session-ut")
|
|
s := NewSessionWithEtcd(ctx, metaRoot, etcdCli)
|
|
defer s.Revoke(time.Second)
|
|
|
|
getWatcher := func(s *Session, rewatch Rewatch) *sessionWatcher {
|
|
return &sessionWatcher{
|
|
s: s,
|
|
prefix: "test",
|
|
rewatch: rewatch,
|
|
eventCh: make(chan *SessionEvent, 10),
|
|
validate: func(*Session) bool { return true },
|
|
}
|
|
}
|
|
|
|
t.Run("handle normal events", func(t *testing.T) {
|
|
w := getWatcher(s, nil)
|
|
wresp := clientv3.WatchResponse{
|
|
Events: []*clientv3.Event{
|
|
{
|
|
Type: mvccpb.PUT,
|
|
Kv: &mvccpb.KeyValue{
|
|
Value: []byte(`{"ServerID": 1, "ServerName": "test1"}`),
|
|
},
|
|
},
|
|
{
|
|
Type: mvccpb.DELETE,
|
|
PrevKv: &mvccpb.KeyValue{
|
|
Value: []byte(`{"ServerID": 2, "ServerName": "test2"}`),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
assert.NotPanics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
|
|
assert.Equal(t, 2, len(w.eventCh))
|
|
})
|
|
|
|
t.Run("handle abnormal events", func(t *testing.T) {
|
|
w := getWatcher(s, nil)
|
|
wresp := clientv3.WatchResponse{
|
|
Events: []*clientv3.Event{
|
|
{
|
|
Type: mvccpb.PUT,
|
|
Kv: &mvccpb.KeyValue{
|
|
Value: []byte(``),
|
|
},
|
|
},
|
|
{
|
|
Type: mvccpb.DELETE,
|
|
PrevKv: &mvccpb.KeyValue{
|
|
Value: []byte(``),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
assert.NotPanics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
assert.Equal(t, 0, len(w.eventCh))
|
|
})
|
|
|
|
t.Run("err compacted resp, nil Rewatch", func(t *testing.T) {
|
|
w := getWatcher(s, nil)
|
|
wresp := clientv3.WatchResponse{
|
|
CompactRevision: 1,
|
|
}
|
|
assert.NotPanics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
})
|
|
|
|
t.Run("err compacted resp, valid Rewatch", func(t *testing.T) {
|
|
w := getWatcher(s, func(sessions map[string]*Session) error {
|
|
return nil
|
|
})
|
|
wresp := clientv3.WatchResponse{
|
|
CompactRevision: 1,
|
|
}
|
|
assert.NotPanics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
})
|
|
|
|
t.Run("err canceled", func(t *testing.T) {
|
|
w := getWatcher(s, nil)
|
|
wresp := clientv3.WatchResponse{
|
|
Canceled: true,
|
|
}
|
|
|
|
assert.Panics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
})
|
|
|
|
t.Run("err handled but rewatch failed", func(t *testing.T) {
|
|
w := getWatcher(s, func(sessions map[string]*Session) error {
|
|
return errors.New("mocked")
|
|
})
|
|
wresp := clientv3.WatchResponse{
|
|
CompactRevision: 1,
|
|
}
|
|
assert.Panics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
})
|
|
|
|
t.Run("err handled but list failed", func(t *testing.T) {
|
|
s := NewSessionWithEtcd(ctx, "/by-dev/session-ut", etcdCli)
|
|
s.etcdCli.Close()
|
|
w := getWatcher(s, func(sessions map[string]*Session) error {
|
|
return nil
|
|
})
|
|
wresp := clientv3.WatchResponse{
|
|
CompactRevision: 1,
|
|
}
|
|
|
|
assert.Panics(t, func() {
|
|
w.handleWatchResponse(wresp)
|
|
})
|
|
})
|
|
}
|
|
|
|
func TestSession_Registered(t *testing.T) {
|
|
session := &Session{}
|
|
session.UpdateRegistered(false)
|
|
assert.False(t, session.Registered())
|
|
session.UpdateRegistered(true)
|
|
assert.True(t, session.Registered())
|
|
}
|
|
|
|
func TestSession_String(t *testing.T) {
|
|
s := &Session{}
|
|
log.Debug("log session", zap.Any("session", s))
|
|
}
|
|
|
|
func TestSesssionMarshal(t *testing.T) {
|
|
s := &Session{
|
|
SessionRaw: SessionRaw{
|
|
ServerID: 1,
|
|
ServerName: "test",
|
|
Address: "localhost",
|
|
},
|
|
Version: common.Version,
|
|
}
|
|
|
|
bs, err := json.Marshal(s)
|
|
require.NoError(t, err)
|
|
|
|
s2 := &Session{}
|
|
err = json.Unmarshal(bs, s2)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, s.ServerID, s2.ServerID)
|
|
assert.Equal(t, s.ServerName, s2.ServerName)
|
|
assert.Equal(t, s.Address, s2.Address)
|
|
assert.Equal(t, s.Version.String(), s2.Version.String())
|
|
}
|
|
|
|
func TestSesssionDelete(t *testing.T) {
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
defer etcdCli.Close()
|
|
|
|
// Empty etcdCli
|
|
s := &Session{
|
|
SessionRaw: SessionRaw{
|
|
ServerID: 1,
|
|
ServerName: "test",
|
|
Address: "localhost",
|
|
},
|
|
Version: common.Version,
|
|
}
|
|
res := s.deleteSession()
|
|
assert.False(t, res)
|
|
|
|
// Closed etcdCli
|
|
s = &Session{
|
|
SessionRaw: SessionRaw{
|
|
ServerID: 1,
|
|
ServerName: "test",
|
|
Address: "localhost",
|
|
},
|
|
Version: common.Version,
|
|
}
|
|
s.etcdCli = etcdCli
|
|
etcdCli.Close()
|
|
res = s.deleteSession()
|
|
assert.False(t, res)
|
|
}
|
|
|
|
func TestSessionUnmarshal(t *testing.T) {
|
|
t.Run("json failure", func(t *testing.T) {
|
|
s := &Session{}
|
|
err := json.Unmarshal([]byte("garbage"), s)
|
|
assert.Error(t, err)
|
|
})
|
|
|
|
t.Run("version error", func(t *testing.T) {
|
|
s := &Session{}
|
|
err := json.Unmarshal([]byte(`{"Version": "a.b.c"}`), s)
|
|
assert.Error(t, err)
|
|
})
|
|
}
|
|
|
|
type SessionWithVersionSuite struct {
|
|
suite.Suite
|
|
tmpDir string
|
|
etcdServer *embed.Etcd
|
|
|
|
metaRoot string
|
|
serverName string
|
|
sessions []*Session
|
|
client *clientv3.Client
|
|
}
|
|
|
|
// SetupSuite setup suite env
|
|
func (suite *SessionWithVersionSuite) SetupSuite() {
|
|
dir, err := os.MkdirTemp(os.TempDir(), "milvus_ut")
|
|
suite.Require().NoError(err)
|
|
suite.tmpDir = dir
|
|
suite.T().Log("using tmp dir:", dir)
|
|
|
|
config := embed.NewConfig()
|
|
|
|
config.Dir = os.TempDir()
|
|
config.LogLevel = "warn"
|
|
config.LogOutputs = []string{"default"}
|
|
u, err := url.Parse("http://localhost:0")
|
|
suite.Require().NoError(err)
|
|
|
|
config.LCUrls = []url.URL{*u}
|
|
u, err = url.Parse("http://localhost:0")
|
|
suite.Require().NoError(err)
|
|
config.LPUrls = []url.URL{*u}
|
|
|
|
etcdServer, err := embed.StartEtcd(config)
|
|
suite.Require().NoError(err)
|
|
suite.etcdServer = etcdServer
|
|
}
|
|
|
|
func (suite *SessionWithVersionSuite) TearDownSuite() {
|
|
if suite.etcdServer != nil {
|
|
suite.etcdServer.Close()
|
|
}
|
|
if suite.tmpDir != "" {
|
|
os.RemoveAll(suite.tmpDir)
|
|
}
|
|
}
|
|
|
|
func (suite *SessionWithVersionSuite) SetupTest() {
|
|
client := v3client.New(suite.etcdServer.Server)
|
|
suite.client = client
|
|
|
|
ctx := context.Background()
|
|
suite.metaRoot = "sessionWithVersion"
|
|
suite.serverName = "sessionComp"
|
|
|
|
s1 := NewSessionWithEtcd(ctx, suite.metaRoot, client, WithResueNodeID(false))
|
|
s1.Version.Major, s1.Version.Minor, s1.Version.Patch = 0, 0, 0
|
|
s1.Init(suite.serverName, "s1", false, false)
|
|
s1.Register()
|
|
|
|
suite.sessions = append(suite.sessions, s1)
|
|
|
|
s2 := NewSessionWithEtcd(ctx, suite.metaRoot, client, WithResueNodeID(false))
|
|
s2.Version.Major, s2.Version.Minor, s2.Version.Patch = 2, 1, 0
|
|
s2.Init(suite.serverName, "s2", false, false)
|
|
s2.Register()
|
|
|
|
suite.sessions = append(suite.sessions, s2)
|
|
|
|
s3 := NewSessionWithEtcd(ctx, suite.metaRoot, client, WithResueNodeID(false))
|
|
s3.Version.Major, s3.Version.Minor, s3.Version.Patch = 2, 2, 0
|
|
s3.Version.Build = []string{"dev"}
|
|
s3.Init(suite.serverName, "s3", false, false)
|
|
s3.Register()
|
|
|
|
suite.sessions = append(suite.sessions, s3)
|
|
}
|
|
|
|
func (suite *SessionWithVersionSuite) TearDownTest() {
|
|
for _, s := range suite.sessions {
|
|
s.Revoke(time.Second)
|
|
}
|
|
|
|
suite.sessions = nil
|
|
_, err := suite.client.Delete(context.Background(), suite.metaRoot, clientv3.WithPrefix())
|
|
suite.Require().NoError(err)
|
|
|
|
if suite.client != nil {
|
|
suite.client.Close()
|
|
suite.client = nil
|
|
}
|
|
}
|
|
|
|
func (suite *SessionWithVersionSuite) TestGetSessionsWithRangeVersion() {
|
|
s := NewSessionWithEtcd(context.Background(), suite.metaRoot, suite.client, WithResueNodeID(false))
|
|
|
|
suite.Run(">1.0.0", func() {
|
|
r, err := semver.ParseRange(">1.0.0")
|
|
suite.Require().NoError(err)
|
|
|
|
result, _, err := s.GetSessionsWithVersionRange(suite.serverName, r)
|
|
suite.Require().NoError(err)
|
|
suite.Equal(2, len(result))
|
|
})
|
|
|
|
suite.Run(">2.1.0", func() {
|
|
r, err := semver.ParseRange(">2.1.0")
|
|
suite.Require().NoError(err)
|
|
|
|
result, _, err := s.GetSessionsWithVersionRange(suite.serverName, r)
|
|
suite.Require().NoError(err)
|
|
suite.Equal(1, len(result))
|
|
})
|
|
|
|
suite.Run(">2.2.0", func() {
|
|
r, err := semver.ParseRange(">2.2.0")
|
|
suite.Require().NoError(err)
|
|
|
|
result, _, err := s.GetSessionsWithVersionRange(suite.serverName, r)
|
|
suite.Require().NoError(err)
|
|
suite.Equal(0, len(result))
|
|
suite.T().Log(result)
|
|
})
|
|
|
|
suite.Run(">=0.0.0 with garbage", func() {
|
|
ctx := context.Background()
|
|
r, err := semver.ParseRange(">=0.0.0")
|
|
suite.Require().NoError(err)
|
|
|
|
suite.client.Put(ctx, path.Join(suite.metaRoot, DefaultServiceRoot, suite.serverName, "garbage"), "garbage")
|
|
suite.client.Put(ctx, path.Join(suite.metaRoot, DefaultServiceRoot, suite.serverName, "garbage_1"), `{"Version": "a.b.c"}`)
|
|
|
|
_, _, err = s.GetSessionsWithVersionRange(suite.serverName, r)
|
|
suite.Error(err)
|
|
})
|
|
}
|
|
|
|
func (suite *SessionWithVersionSuite) TestWatchServicesWithVersionRange() {
|
|
s := NewSessionWithEtcd(context.Background(), suite.metaRoot, suite.client, WithResueNodeID(false))
|
|
|
|
suite.Run(">1.0.0 <=2.1.0", func() {
|
|
r, err := semver.ParseRange(">1.0.0 <=2.1.0")
|
|
suite.Require().NoError(err)
|
|
|
|
_, rev, err := s.GetSessionsWithVersionRange(suite.serverName, r)
|
|
suite.Require().NoError(err)
|
|
|
|
ch := s.WatchServicesWithVersionRange(suite.serverName, r, rev, nil)
|
|
|
|
// remove all sessions
|
|
go func() {
|
|
for _, s := range suite.sessions {
|
|
s.Revoke(time.Second)
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case evt := <-ch:
|
|
suite.Equal(suite.sessions[1].ServerID, evt.Session.ServerID)
|
|
case <-time.After(time.Second):
|
|
suite.Fail("no event received, failing")
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestSessionWithVersionRange(t *testing.T) {
|
|
suite.Run(t, new(SessionWithVersionSuite))
|
|
}
|
|
|
|
func TestSessionProcessActiveStandBy(t *testing.T) {
|
|
// initial etcd
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s1", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, metaRoot)
|
|
err = etcdKV.RemoveWithPrefix("")
|
|
assert.NoError(t, err)
|
|
|
|
defer etcdKV.Close()
|
|
defer etcdKV.RemoveWithPrefix("")
|
|
|
|
var wg sync.WaitGroup
|
|
signal := make(chan struct{})
|
|
flag := false
|
|
|
|
// register session 1, will be active
|
|
ctx1 := context.Background()
|
|
s1 := NewSessionWithEtcd(ctx1, metaRoot, etcdCli, WithResueNodeID(false))
|
|
s1.Init("inittest", "testAddr", true, true)
|
|
s1.SetEnableActiveStandBy(true)
|
|
s1.Register()
|
|
wg.Add(1)
|
|
s1.ProcessActiveStandBy(func() error {
|
|
log.Debug("Session 1 become active")
|
|
wg.Done()
|
|
return nil
|
|
})
|
|
wg.Wait()
|
|
s1.LivenessCheck(ctx1, func() {
|
|
log.Debug("Session 1 livenessCheck callback")
|
|
flag = true
|
|
close(signal)
|
|
s1.cancelKeepAlive()
|
|
})
|
|
assert.False(t, s1.isStandby.Load().(bool))
|
|
|
|
// register session 2, will be standby
|
|
ctx2 := context.Background()
|
|
s2 := NewSessionWithEtcd(ctx2, metaRoot, etcdCli, WithResueNodeID(false))
|
|
s2.Init("inittest", "testAddr", true, true)
|
|
s2.SetEnableActiveStandBy(true)
|
|
s2.Register()
|
|
wg.Add(1)
|
|
go s2.ProcessActiveStandBy(func() error {
|
|
log.Debug("Session 2 become active")
|
|
wg.Done()
|
|
return nil
|
|
})
|
|
assert.True(t, s2.isStandby.Load().(bool))
|
|
|
|
// assert.True(t, s2.watchingPrimaryKeyLock)
|
|
// stop session 1, session 2 will take over primary service
|
|
log.Debug("Stop session 1, session 2 will take over primary service")
|
|
assert.False(t, flag)
|
|
|
|
s1.safeCloseLiveCh()
|
|
{
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
|
defer cancel()
|
|
_, _ = s1.etcdCli.Revoke(ctx, *s1.LeaseID)
|
|
}
|
|
select {
|
|
case <-signal:
|
|
log.Debug("receive s1 signal")
|
|
case <-time.After(10 * time.Second):
|
|
log.Debug("wait to fail Liveness Check timeout")
|
|
t.FailNow()
|
|
}
|
|
assert.True(t, flag)
|
|
log.Debug("session s1 stop")
|
|
|
|
wg.Wait()
|
|
log.Debug("session s2 wait done")
|
|
assert.False(t, s2.isStandby.Load().(bool))
|
|
s2.Stop()
|
|
}
|
|
|
|
func TestSessionEventType_String(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
t SessionEventType
|
|
want string
|
|
}{
|
|
{t: SessionNoneEvent, want: ""},
|
|
{t: SessionAddEvent, want: "SessionAddEvent"},
|
|
{t: SessionDelEvent, want: "SessionDelEvent"},
|
|
{t: SessionUpdateEvent, want: "SessionUpdateEvent"},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
assert.Equalf(t, tt.want, tt.t.String(), "String()")
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestServerInfoOp(t *testing.T) {
|
|
t.Run("test with specified pid", func(t *testing.T) {
|
|
pid := 9999999
|
|
serverID := int64(999)
|
|
|
|
filePath := GetServerInfoFilePath(pid)
|
|
defer os.RemoveAll(filePath)
|
|
|
|
saveServerInfoInternal(typeutil.QueryCoordRole, serverID, pid)
|
|
saveServerInfoInternal(typeutil.DataCoordRole, serverID, pid)
|
|
saveServerInfoInternal(typeutil.ProxyRole, serverID, pid)
|
|
|
|
sessions := GetSessions(pid)
|
|
assert.Equal(t, 3, len(sessions))
|
|
assert.ElementsMatch(t, sessions, []string{
|
|
"querycoord-999",
|
|
"datacoord-999",
|
|
"proxy-999",
|
|
})
|
|
|
|
RemoveServerInfoFile(pid)
|
|
sessions = GetSessions(pid)
|
|
assert.Equal(t, 0, len(sessions))
|
|
})
|
|
|
|
t.Run("test with os pid", func(t *testing.T) {
|
|
serverID := int64(9999)
|
|
filePath := GetServerInfoFilePath(os.Getpid())
|
|
defer os.RemoveAll(filePath)
|
|
|
|
SaveServerInfo(typeutil.QueryCoordRole, serverID)
|
|
sessions := GetSessions(os.Getpid())
|
|
assert.Equal(t, 1, len(sessions))
|
|
})
|
|
}
|
|
|
|
func TestSession_apply(t *testing.T) {
|
|
session := &Session{}
|
|
opts := []SessionOption{WithTTL(100), WithRetryTimes(200)}
|
|
session.apply(opts...)
|
|
assert.Equal(t, int64(100), session.sessionTTL)
|
|
assert.Equal(t, int64(200), session.sessionRetryTimes)
|
|
}
|
|
|
|
func TestIntegrationMode(t *testing.T) {
|
|
ctx := context.Background()
|
|
paramtable.Init()
|
|
params := paramtable.Get()
|
|
params.Save(params.IntegrationTestCfg.IntegrationMode.Key, "true")
|
|
|
|
endpoints := params.EtcdCfg.Endpoints.GetValue()
|
|
metaRoot := fmt.Sprintf("%d/%s", rand.Int(), DefaultServiceRoot)
|
|
|
|
etcdEndpoints := strings.Split(endpoints, ",")
|
|
etcdCli, err := etcd.GetRemoteEtcdClient(etcdEndpoints)
|
|
require.NoError(t, err)
|
|
etcdKV := etcdkv.NewEtcdKV(etcdCli, metaRoot)
|
|
err = etcdKV.RemoveWithPrefix("")
|
|
assert.NoError(t, err)
|
|
|
|
s1 := NewSessionWithEtcd(ctx, metaRoot, etcdCli)
|
|
assert.Equal(t, false, s1.reuseNodeID)
|
|
s2 := NewSessionWithEtcd(ctx, metaRoot, etcdCli)
|
|
assert.Equal(t, false, s2.reuseNodeID)
|
|
s1.Init("inittest1", "testAddr1", false, false)
|
|
s1.Init("inittest2", "testAddr2", false, false)
|
|
assert.NotEqual(t, s1.ServerID, s2.ServerID)
|
|
}
|
|
|
|
type SessionSuite struct {
|
|
suite.Suite
|
|
tmpDir string
|
|
etcdServer *embed.Etcd
|
|
|
|
metaRoot string
|
|
serverName string
|
|
client *clientv3.Client
|
|
}
|
|
|
|
func (s *SessionSuite) SetupSuite() {
|
|
paramtable.Init()
|
|
dir, err := os.MkdirTemp(os.TempDir(), "milvus_ut")
|
|
s.Require().NoError(err)
|
|
s.tmpDir = dir
|
|
s.T().Log("using tmp dir:", dir)
|
|
|
|
config := embed.NewConfig()
|
|
|
|
config.Dir = os.TempDir()
|
|
config.LogLevel = "warn"
|
|
config.LogOutputs = []string{"default"}
|
|
u, err := url.Parse("http://localhost:0")
|
|
s.Require().NoError(err)
|
|
|
|
config.LCUrls = []url.URL{*u}
|
|
u, err = url.Parse("http://localhost:0")
|
|
s.Require().NoError(err)
|
|
config.LPUrls = []url.URL{*u}
|
|
|
|
etcdServer, err := embed.StartEtcd(config)
|
|
s.Require().NoError(err)
|
|
s.etcdServer = etcdServer
|
|
}
|
|
|
|
func (s *SessionSuite) TearDownSuite() {
|
|
if s.etcdServer != nil {
|
|
s.etcdServer.Close()
|
|
}
|
|
if s.tmpDir != "" {
|
|
os.RemoveAll(s.tmpDir)
|
|
}
|
|
}
|
|
|
|
func (s *SessionSuite) SetupTest() {
|
|
client := v3client.New(s.etcdServer.Server)
|
|
s.client = client
|
|
|
|
s.metaRoot = fmt.Sprintf("milvus-ut/session-%s/", funcutil.GenRandomStr())
|
|
}
|
|
|
|
func (s *SessionSuite) TearDownTest() {
|
|
_, err := s.client.Delete(context.Background(), s.metaRoot, clientv3.WithPrefix())
|
|
s.Require().NoError(err)
|
|
|
|
if s.client != nil {
|
|
s.client.Close()
|
|
s.client = nil
|
|
}
|
|
}
|
|
|
|
func (s *SessionSuite) TestDisconnected() {
|
|
st := &Session{}
|
|
st.SetDisconnected(true)
|
|
sf := &Session{}
|
|
sf.SetDisconnected(false)
|
|
|
|
cases := []struct {
|
|
tag string
|
|
input *Session
|
|
expect bool
|
|
}{
|
|
{"not_set", &Session{}, false},
|
|
{"set_true", st, true},
|
|
{"set_false", sf, false},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
s.Run(c.tag, func() {
|
|
s.Equal(c.expect, c.input.Disconnected())
|
|
})
|
|
}
|
|
}
|
|
|
|
func (s *SessionSuite) TestGoingStop() {
|
|
ctx := context.Background()
|
|
sdisconnect := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
|
sdisconnect.SetDisconnected(true)
|
|
|
|
sess := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
|
sess.Init("test", "normal", false, false)
|
|
sess.Register()
|
|
|
|
cases := []struct {
|
|
tag string
|
|
input *Session
|
|
expectError bool
|
|
}{
|
|
{"nil", nil, true},
|
|
{"not_inited", &Session{}, true},
|
|
{"disconnected", sdisconnect, true},
|
|
{"normal", sess, false},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
s.Run(c.tag, func() {
|
|
err := c.input.GoingStop()
|
|
if c.expectError {
|
|
s.Error(err)
|
|
} else {
|
|
s.NoError(err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func (s *SessionSuite) TestRevoke() {
|
|
ctx := context.Background()
|
|
disconnected := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
disconnected.Init("test", "disconnected", false, false)
|
|
disconnected.Register()
|
|
disconnected.SetDisconnected(true)
|
|
|
|
sess := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
sess.Init("test", "normal", false, false)
|
|
sess.Register()
|
|
|
|
cases := []struct {
|
|
tag string
|
|
input *Session
|
|
preExist bool
|
|
success bool
|
|
}{
|
|
{"not_inited", &Session{}, false, true},
|
|
{"disconnected", disconnected, true, false},
|
|
{"normal", sess, false, true},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
s.Run(c.tag, func() {
|
|
c.input.Revoke(time.Second)
|
|
resp, err := s.client.Get(ctx, c.input.getCompleteKey())
|
|
s.Require().NoError(err)
|
|
if !c.preExist || c.success {
|
|
s.Equal(0, len(resp.Kvs))
|
|
}
|
|
if c.preExist && !c.success {
|
|
s.Equal(1, len(resp.Kvs))
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func (s *SessionSuite) TestForceActiveWithLeaseID() {
|
|
ctx := context.Background()
|
|
role := "test"
|
|
sess1 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
sess1.Init(role, "normal1", false, false)
|
|
sess1.Register()
|
|
sess1.ProcessActiveStandBy(nil)
|
|
|
|
sess2 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
sess2.Init(role, "normal2", false, false)
|
|
sess2.Register()
|
|
sess2.ForceActiveStandby(nil)
|
|
|
|
defer func() {
|
|
sess1.Stop()
|
|
sess2.Stop()
|
|
}()
|
|
sessions, _, err := sess2.GetSessions(role)
|
|
s.NoError(err)
|
|
s.Len(sessions, 2)
|
|
sess := sessions[role]
|
|
s.NotNil(sess)
|
|
s.Equal(sess.Address, "normal2")
|
|
s.Equal(sess.ServerID, sess2.ServerID)
|
|
}
|
|
|
|
func (s *SessionSuite) TestForceActiveWithDelete() {
|
|
ctx := context.Background()
|
|
role := "test"
|
|
sess1 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
sess1.Init(role, "normal1", false, false)
|
|
sessionJSON, err := json.Marshal(sess1)
|
|
s.NoError(err)
|
|
s.client.Put(ctx, path.Join(s.metaRoot, DefaultServiceRoot, fmt.Sprintf("%s-%d", role, 1)), string(sessionJSON))
|
|
s.client.Put(ctx, path.Join(s.metaRoot, DefaultServiceRoot, role), string(sessionJSON))
|
|
|
|
sess2 := NewSessionWithEtcd(ctx, s.metaRoot, s.client, WithResueNodeID(false))
|
|
sess2.Init(role, "normal2", false, false)
|
|
sess2.Register()
|
|
sess2.ForceActiveStandby(nil)
|
|
|
|
defer func() {
|
|
sess1.Stop()
|
|
sess2.Stop()
|
|
}()
|
|
sessions, _, err := sess2.GetSessions(role)
|
|
s.NoError(err)
|
|
s.Len(sessions, 2)
|
|
sess := sessions[role]
|
|
s.NotNil(sess)
|
|
s.Equal(sess.Address, "normal2")
|
|
s.Equal(sess.ServerID, sess2.ServerID)
|
|
}
|
|
|
|
func (s *SessionSuite) TestKeepAliveRetryActiveCancel() {
|
|
ctx := context.Background()
|
|
session := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
|
session.Init("test", "normal", false, false)
|
|
|
|
// Register
|
|
ch, err := session.registerService()
|
|
s.Require().NoError(err)
|
|
session.liveCh = make(chan struct{})
|
|
session.processKeepAliveResponse(ch)
|
|
session.LivenessCheck(ctx, nil)
|
|
// active cancel, should not retry connect
|
|
session.cancelKeepAlive()
|
|
|
|
// wait workers exit
|
|
session.wg.Wait()
|
|
// expected Disconnected = true, means session is closed
|
|
assert.Equal(s.T(), true, session.Disconnected())
|
|
}
|
|
|
|
func (s *SessionSuite) TestKeepAliveRetryChannelClose() {
|
|
ctx := context.Background()
|
|
session := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
|
session.Init("test", "normal", false, false)
|
|
|
|
// Register
|
|
_, err := session.registerService()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
session.liveCh = make(chan struct{})
|
|
closeChan := make(chan *clientv3.LeaseKeepAliveResponse)
|
|
sendChan := (<-chan *clientv3.LeaseKeepAliveResponse)(closeChan)
|
|
session.processKeepAliveResponse(sendChan)
|
|
session.LivenessCheck(ctx, nil)
|
|
// close channel, should retry connect
|
|
close(closeChan)
|
|
|
|
// sleep a while wait goroutine process
|
|
time.Sleep(time.Millisecond * 100)
|
|
// expected Disconnected = false, means session is not closed
|
|
assert.Equal(s.T(), false, session.Disconnected())
|
|
time.Sleep(time.Second * 1)
|
|
// expected Disconnected = false, means session is not closed, keepalive keeps working
|
|
assert.Equal(s.T(), false, session.Disconnected())
|
|
}
|
|
|
|
func (s *SessionSuite) TestSafeCloseLiveCh() {
|
|
ctx := context.Background()
|
|
session := NewSessionWithEtcd(ctx, s.metaRoot, s.client)
|
|
session.Init("test", "normal", false, false)
|
|
session.liveCh = make(chan struct{})
|
|
session.safeCloseLiveCh()
|
|
assert.NotPanics(s.T(), func() {
|
|
session.safeCloseLiveCh()
|
|
})
|
|
}
|
|
|
|
func TestSessionSuite(t *testing.T) {
|
|
suite.Run(t, new(SessionSuite))
|
|
}
|