mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
fix: backoff will retry infinitely after reaching max elapse (#40589)
issue: #40588 Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
parent
e93d53ca28
commit
f6fb4bc442
@ -130,6 +130,7 @@ func (rc *resumableConsumerImpl) createNewConsumer(opts *handler.ConsumerOptions
|
|||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
backoff.InitialInterval = 100 * time.Millisecond
|
backoff.InitialInterval = 100 * time.Millisecond
|
||||||
backoff.MaxInterval = 10 * time.Second
|
backoff.MaxInterval = 10 * time.Second
|
||||||
|
backoff.MaxElapsedTime = 0
|
||||||
for {
|
for {
|
||||||
// Create a new consumer.
|
// Create a new consumer.
|
||||||
// a underlying stream consumer life time should be equal to the resumable producer.
|
// a underlying stream consumer life time should be equal to the resumable producer.
|
||||||
|
|||||||
@ -152,7 +152,8 @@ func (p *ResumableProducer) waitUntilUnavailable(producer handler.Producer) erro
|
|||||||
func (p *ResumableProducer) createNewProducer() (producer.Producer, error) {
|
func (p *ResumableProducer) createNewProducer() (producer.Producer, error) {
|
||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
backoff.InitialInterval = 100 * time.Millisecond
|
backoff.InitialInterval = 100 * time.Millisecond
|
||||||
backoff.MaxInterval = 2 * time.Second
|
backoff.MaxInterval = 10 * time.Second
|
||||||
|
backoff.MaxElapsedTime = 0
|
||||||
for {
|
for {
|
||||||
// Create a new producer.
|
// Create a new producer.
|
||||||
// a underlying stream producer life time should be equal to the resumable producer.
|
// a underlying stream producer life time should be equal to the resumable producer.
|
||||||
|
|||||||
@ -27,6 +27,7 @@ import (
|
|||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/util/streamingutil"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/metrics"
|
"github.com/milvus-io/milvus/pkg/v2/metrics"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/mq/common"
|
"github.com/milvus-io/milvus/pkg/v2/mq/common"
|
||||||
@ -179,19 +180,23 @@ func newDmlChannels(initCtx context.Context, factory msgstream.Factory, chanName
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, name := range names {
|
for i, name := range names {
|
||||||
ms, err := factory.NewMsgStream(initCtx)
|
var ms msgstream.MsgStream
|
||||||
if err != nil {
|
if !streamingutil.IsStreamingServiceEnabled() {
|
||||||
log.Ctx(initCtx).Error("Failed to add msgstream",
|
var err error
|
||||||
zap.String("name", name),
|
ms, err = factory.NewMsgStream(initCtx)
|
||||||
zap.Error(err))
|
if err != nil {
|
||||||
panic("Failed to add msgstream")
|
log.Ctx(initCtx).Error("Failed to add msgstream",
|
||||||
}
|
zap.String("name", name),
|
||||||
|
zap.Error(err))
|
||||||
|
panic("Failed to add msgstream")
|
||||||
|
}
|
||||||
|
|
||||||
if params.PreCreatedTopicEnabled.GetAsBool() {
|
if params.PreCreatedTopicEnabled.GetAsBool() {
|
||||||
d.checkPreCreatedTopic(initCtx, factory, name)
|
d.checkPreCreatedTopic(initCtx, factory, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
ms.AsProducer(initCtx, []string{name})
|
ms.AsProducer(initCtx, []string{name})
|
||||||
|
}
|
||||||
dms := &dmlMsgStream{
|
dms := &dmlMsgStream{
|
||||||
ms: ms,
|
ms: ms,
|
||||||
refcnt: 0,
|
refcnt: 0,
|
||||||
|
|||||||
@ -155,6 +155,9 @@ type handlerCreateFunc func(ctx context.Context, assign *types.PChannelInfoAssig
|
|||||||
func (hc *handlerClientImpl) createHandlerAfterStreamingNodeReady(ctx context.Context, logger *log.MLogger, pchannel string, create handlerCreateFunc) (any, error) {
|
func (hc *handlerClientImpl) createHandlerAfterStreamingNodeReady(ctx context.Context, logger *log.MLogger, pchannel string, create handlerCreateFunc) (any, error) {
|
||||||
// TODO: backoff should be configurable.
|
// TODO: backoff should be configurable.
|
||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
|
backoff.InitialInterval = 100 * time.Millisecond
|
||||||
|
backoff.MaxInterval = 10 * time.Second
|
||||||
|
backoff.MaxElapsedTime = 0
|
||||||
for {
|
for {
|
||||||
assign := hc.watcher.Get(ctx, pchannel)
|
assign := hc.watcher.Get(ctx, pchannel)
|
||||||
if assign != nil {
|
if assign != nil {
|
||||||
|
|||||||
@ -117,6 +117,7 @@ func (m *pchannelCheckpointManager) background(previous message.MessageID) {
|
|||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
backoff.InitialInterval = 100 * time.Millisecond
|
backoff.InitialInterval = 100 * time.Millisecond
|
||||||
backoff.MaxInterval = 10 * time.Second
|
backoff.MaxInterval = 10 * time.Second
|
||||||
|
backoff.MaxElapsedTime = 0
|
||||||
for {
|
for {
|
||||||
current, err := m.blockUntilCheckpointUpdate(previous)
|
current, err := m.blockUntilCheckpointUpdate(previous)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -2,6 +2,7 @@ package lazygrpc
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/cenkalti/backoff/v4"
|
"github.com/cenkalti/backoff/v4"
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
@ -50,6 +51,11 @@ type connImpl struct {
|
|||||||
func (c *connImpl) initialize() {
|
func (c *connImpl) initialize() {
|
||||||
defer c.initializationNotifier.Finish(struct{}{})
|
defer c.initializationNotifier.Finish(struct{}{})
|
||||||
|
|
||||||
|
newBackOff := backoff.NewExponentialBackOff()
|
||||||
|
newBackOff.InitialInterval = 100 * time.Millisecond
|
||||||
|
newBackOff.MaxInterval = 10 * time.Second
|
||||||
|
newBackOff.MaxElapsedTime = 0
|
||||||
|
|
||||||
backoff.Retry(func() error {
|
backoff.Retry(func() error {
|
||||||
conn, err := c.dialer(c.initializationNotifier.Context())
|
conn, err := c.dialer(c.initializationNotifier.Context())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -62,7 +68,7 @@ func (c *connImpl) initialize() {
|
|||||||
}
|
}
|
||||||
c.conn.Set(conn)
|
c.conn.Set(conn)
|
||||||
return nil
|
return nil
|
||||||
}, backoff.NewExponentialBackOff())
|
}, newBackOff)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *connImpl) GetConn(ctx context.Context) (*grpc.ClientConn, error) {
|
func (c *connImpl) GetConn(ctx context.Context) (*grpc.ClientConn, error) {
|
||||||
|
|||||||
@ -55,13 +55,11 @@ type BackoffTimer struct {
|
|||||||
func (t *BackoffTimer) EnableBackoff() {
|
func (t *BackoffTimer) EnableBackoff() {
|
||||||
if t.backoff == nil {
|
if t.backoff == nil {
|
||||||
cfg := t.configFetcher.BackoffConfig()
|
cfg := t.configFetcher.BackoffConfig()
|
||||||
defaultInterval := t.configFetcher.DefaultInterval()
|
|
||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
backoff.InitialInterval = cfg.InitialInterval
|
backoff.InitialInterval = cfg.InitialInterval
|
||||||
backoff.Multiplier = cfg.Multiplier
|
backoff.Multiplier = cfg.Multiplier
|
||||||
backoff.MaxInterval = cfg.MaxInterval
|
backoff.MaxInterval = cfg.MaxInterval
|
||||||
backoff.MaxElapsedTime = defaultInterval
|
backoff.MaxElapsedTime = 0
|
||||||
backoff.Stop = defaultInterval
|
|
||||||
backoff.Reset()
|
backoff.Reset()
|
||||||
t.backoff = backoff
|
t.backoff = backoff
|
||||||
}
|
}
|
||||||
@ -72,14 +70,6 @@ func (t *BackoffTimer) DisableBackoff() {
|
|||||||
t.backoff = nil
|
t.backoff = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsBackoffStopped returns the elapsed time of backoff
|
|
||||||
func (t *BackoffTimer) IsBackoffStopped() bool {
|
|
||||||
if t.backoff != nil {
|
|
||||||
return t.backoff.GetElapsedTime() > t.backoff.MaxElapsedTime
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextTimer returns the next timer and the duration of the timer
|
// NextTimer returns the next timer and the duration of the timer
|
||||||
func (t *BackoffTimer) NextTimer() (<-chan time.Time, time.Duration) {
|
func (t *BackoffTimer) NextTimer() (<-chan time.Time, time.Duration) {
|
||||||
nextBackoff := t.NextInterval()
|
nextBackoff := t.NextInterval()
|
||||||
@ -98,13 +88,11 @@ func (t *BackoffTimer) NextInterval() time.Duration {
|
|||||||
// NewBackoffWithInstant creates a new backoff with instant
|
// NewBackoffWithInstant creates a new backoff with instant
|
||||||
func NewBackoffWithInstant(fetcher BackoffTimerConfigFetcher) *BackoffWithInstant {
|
func NewBackoffWithInstant(fetcher BackoffTimerConfigFetcher) *BackoffWithInstant {
|
||||||
cfg := fetcher.BackoffConfig()
|
cfg := fetcher.BackoffConfig()
|
||||||
defaultInterval := fetcher.DefaultInterval()
|
|
||||||
backoff := backoff.NewExponentialBackOff()
|
backoff := backoff.NewExponentialBackOff()
|
||||||
backoff.InitialInterval = cfg.InitialInterval
|
backoff.InitialInterval = cfg.InitialInterval
|
||||||
backoff.Multiplier = cfg.Multiplier
|
backoff.Multiplier = cfg.Multiplier
|
||||||
backoff.MaxInterval = cfg.MaxInterval
|
backoff.MaxInterval = cfg.MaxInterval
|
||||||
backoff.MaxElapsedTime = defaultInterval
|
backoff.MaxElapsedTime = 0
|
||||||
backoff.Stop = defaultInterval
|
|
||||||
backoff.Reset()
|
backoff.Reset()
|
||||||
return &BackoffWithInstant{
|
return &BackoffWithInstant{
|
||||||
backoff: backoff,
|
backoff: backoff,
|
||||||
|
|||||||
@ -21,24 +21,14 @@ func TestBackoffTimer(t *testing.T) {
|
|||||||
assert.Equal(t, time.Second, b.NextInterval())
|
assert.Equal(t, time.Second, b.NextInterval())
|
||||||
assert.Equal(t, time.Second, b.NextInterval())
|
assert.Equal(t, time.Second, b.NextInterval())
|
||||||
assert.Equal(t, time.Second, b.NextInterval())
|
assert.Equal(t, time.Second, b.NextInterval())
|
||||||
assert.True(t, b.IsBackoffStopped())
|
|
||||||
|
|
||||||
b.EnableBackoff()
|
b.EnableBackoff()
|
||||||
assert.False(t, b.IsBackoffStopped())
|
|
||||||
timer, backoff := b.NextTimer()
|
timer, backoff := b.NextTimer()
|
||||||
assert.Less(t, backoff, 200*time.Millisecond)
|
assert.Less(t, backoff, 200*time.Millisecond)
|
||||||
for {
|
<-timer
|
||||||
<-timer
|
_, backoff = b.NextTimer()
|
||||||
if b.IsBackoffStopped() {
|
assert.NotZero(t, backoff)
|
||||||
break
|
|
||||||
}
|
|
||||||
timer, _ = b.NextTimer()
|
|
||||||
}
|
|
||||||
assert.True(t, b.IsBackoffStopped())
|
|
||||||
|
|
||||||
assert.Equal(t, time.Second, b.NextInterval())
|
|
||||||
b.DisableBackoff()
|
b.DisableBackoff()
|
||||||
assert.Equal(t, time.Second, b.NextInterval())
|
assert.Equal(t, time.Second, b.NextInterval())
|
||||||
assert.True(t, b.IsBackoffStopped())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user