enhance: Rootcoord's stop may block in quota_center's stop (#31447)

this PR fixed that rootcoord's stop may block in quota_center' stop

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-04-01 21:09:18 +08:00 committed by GitHub
parent 4e264003bf
commit 9111ecab82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 60 additions and 5 deletions

View File

@ -88,6 +88,9 @@ type collectionStates = map[milvuspb.QuotaState]commonpb.ErrorCode
// //
// If necessary, user can also manually force to deny RW requests. // If necessary, user can also manually force to deny RW requests.
type QuotaCenter struct { type QuotaCenter struct {
ctx context.Context
cancel context.CancelFunc
// clients // clients
proxies proxyutil.ProxyClientManagerInterface proxies proxyutil.ProxyClientManagerInterface
queryCoord types.QueryCoordClient queryCoord types.QueryCoordClient
@ -113,11 +116,16 @@ type QuotaCenter struct {
stopOnce sync.Once stopOnce sync.Once
stopChan chan struct{} stopChan chan struct{}
wg sync.WaitGroup
} }
// NewQuotaCenter returns a new QuotaCenter. // NewQuotaCenter returns a new QuotaCenter.
func NewQuotaCenter(proxies proxyutil.ProxyClientManagerInterface, queryCoord types.QueryCoordClient, dataCoord types.DataCoordClient, tsoAllocator tso.Allocator, meta IMetaTable) *QuotaCenter { func NewQuotaCenter(proxies proxyutil.ProxyClientManagerInterface, queryCoord types.QueryCoordClient, dataCoord types.DataCoordClient, tsoAllocator tso.Allocator, meta IMetaTable) *QuotaCenter {
ctx, cancel := context.WithCancel(context.TODO())
return &QuotaCenter{ return &QuotaCenter{
ctx: ctx,
cancel: cancel,
proxies: proxies, proxies: proxies,
queryCoord: queryCoord, queryCoord: queryCoord,
dataCoord: dataCoord, dataCoord: dataCoord,
@ -133,8 +141,15 @@ func NewQuotaCenter(proxies proxyutil.ProxyClientManagerInterface, queryCoord ty
} }
} }
func (q *QuotaCenter) Start() {
q.wg.Add(1)
go q.run()
}
// run starts the service of QuotaCenter. // run starts the service of QuotaCenter.
func (q *QuotaCenter) run() { func (q *QuotaCenter) run() {
defer q.wg.Done()
interval := time.Duration(Params.QuotaConfig.QuotaCenterCollectInterval.GetAsFloat() * float64(time.Second)) interval := time.Duration(Params.QuotaConfig.QuotaCenterCollectInterval.GetAsFloat() * float64(time.Second))
log.Info("Start QuotaCenter", zap.Duration("collectInterval", interval)) log.Info("Start QuotaCenter", zap.Duration("collectInterval", interval))
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
@ -166,9 +181,13 @@ func (q *QuotaCenter) run() {
// stop would stop the service of QuotaCenter. // stop would stop the service of QuotaCenter.
func (q *QuotaCenter) stop() { func (q *QuotaCenter) stop() {
log.Info("stop quota center")
q.stopOnce.Do(func() { q.stopOnce.Do(func() {
q.stopChan <- struct{}{} // cancel all blocking request to coord
q.cancel()
close(q.stopChan)
}) })
q.wg.Wait()
} }
// clearMetrics removes all metrics stored in QuotaCenter. // clearMetrics removes all metrics stored in QuotaCenter.
@ -236,7 +255,7 @@ func (q *QuotaCenter) syncMetrics() error {
oldDataNodes := typeutil.NewSet(lo.Keys(q.dataNodeMetrics)...) oldDataNodes := typeutil.NewSet(lo.Keys(q.dataNodeMetrics)...)
oldQueryNodes := typeutil.NewSet(lo.Keys(q.queryNodeMetrics)...) oldQueryNodes := typeutil.NewSet(lo.Keys(q.queryNodeMetrics)...)
q.clearMetrics() q.clearMetrics()
ctx, cancel := context.WithTimeout(context.Background(), GetMetricsTimeout) ctx, cancel := context.WithTimeout(q.ctx, GetMetricsTimeout)
defer cancel() defer cancel()
group := &errgroup.Group{} group := &errgroup.Group{}
@ -862,7 +881,7 @@ func (q *QuotaCenter) checkDiskQuota() {
// setRates notifies Proxies to set rates for different rate types. // setRates notifies Proxies to set rates for different rate types.
func (q *QuotaCenter) setRates() error { func (q *QuotaCenter) setRates() error {
ctx, cancel := context.WithTimeout(context.Background(), SetRatesTimeout) ctx, cancel := context.WithTimeout(q.ctx, SetRatesTimeout)
defer cancel() defer cancel()
toCollectionRate := func(collection int64, currentRates map[internalpb.RateType]ratelimitutil.Limit) *proxypb.CollectionRate { toCollectionRate := func(collection int64, currentRates map[internalpb.RateType]ratelimitutil.Limit) *proxypb.CollectionRate {

View File

@ -28,6 +28,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock" "github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
@ -67,11 +68,46 @@ func TestQuotaCenter(t *testing.T) {
meta.EXPECT().GetCollectionByID(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, merr.ErrCollectionNotFound).Maybe() meta.EXPECT().GetCollectionByID(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, merr.ErrCollectionNotFound).Maybe()
quotaCenter := NewQuotaCenter(pcm, qc, dc, core.tsoAllocator, meta) quotaCenter := NewQuotaCenter(pcm, qc, dc, core.tsoAllocator, meta)
go quotaCenter.run() quotaCenter.Start()
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
quotaCenter.stop() quotaCenter.stop()
}) })
t.Run("test QuotaCenter stop", func(t *testing.T) {
qc := mocks.NewMockQueryCoordClient(t)
meta := mockrootcoord.NewIMetaTable(t)
paramtable.Get().Save(paramtable.Get().QuotaConfig.QuotaCenterCollectInterval.Key, "1")
defer paramtable.Get().Reset(paramtable.Get().QuotaConfig.QuotaCenterCollectInterval.Key)
qc.ExpectedCalls = nil
// mock query coord stuck for at most 10s
qc.EXPECT().GetMetrics(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, gmr *milvuspb.GetMetricsRequest, co ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
counter := 0
for {
select {
case <-ctx.Done():
return nil, merr.ErrCollectionNotFound
default:
if counter < 10 {
time.Sleep(1 * time.Second)
counter++
}
}
}
})
meta.EXPECT().GetCollectionByID(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, merr.ErrCollectionNotFound).Maybe()
quotaCenter := NewQuotaCenter(pcm, qc, dc, core.tsoAllocator, meta)
quotaCenter.Start()
time.Sleep(3 * time.Second)
// assert stop won't stuck more than 5s
start := time.Now()
quotaCenter.stop()
assert.True(t, time.Since(start).Seconds() <= 5)
})
t.Run("test syncMetrics", func(t *testing.T) { t.Run("test syncMetrics", func(t *testing.T) {
qc := mocks.NewMockQueryCoordClient(t) qc := mocks.NewMockQueryCoordClient(t)
meta := mockrootcoord.NewIMetaTable(t) meta := mockrootcoord.NewIMetaTable(t)

View File

@ -668,7 +668,7 @@ func (c *Core) startInternal() error {
} }
if Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() { if Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() {
go c.quotaCenter.run() c.quotaCenter.Start()
} }
c.scheduler.Start() c.scheduler.Start()