fix: backoff will retry infinitely after reaching max elapse (#40589)

issue: #40588

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
Zhen Ye 2025-03-13 16:24:06 +08:00 committed by GitHub
parent e93d53ca28
commit f6fb4bc442
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 35 additions and 40 deletions

View File

@ -130,6 +130,7 @@ func (rc *resumableConsumerImpl) createNewConsumer(opts *handler.ConsumerOptions
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for { for {
// Create a new consumer. // Create a new consumer.
// a underlying stream consumer life time should be equal to the resumable producer. // a underlying stream consumer life time should be equal to the resumable producer.

View File

@ -152,7 +152,8 @@ func (p *ResumableProducer) waitUntilUnavailable(producer handler.Producer) erro
func (p *ResumableProducer) createNewProducer() (producer.Producer, error) { func (p *ResumableProducer) createNewProducer() (producer.Producer, error) {
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 2 * time.Second backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for { for {
// Create a new producer. // Create a new producer.
// a underlying stream producer life time should be equal to the resumable producer. // a underlying stream producer life time should be equal to the resumable producer.

View File

@ -27,6 +27,7 @@ import (
"github.com/cockroachdb/errors" "github.com/cockroachdb/errors"
"go.uber.org/zap" "go.uber.org/zap"
"github.com/milvus-io/milvus/internal/util/streamingutil"
"github.com/milvus-io/milvus/pkg/v2/log" "github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/metrics" "github.com/milvus-io/milvus/pkg/v2/metrics"
"github.com/milvus-io/milvus/pkg/v2/mq/common" "github.com/milvus-io/milvus/pkg/v2/mq/common"
@ -179,19 +180,23 @@ func newDmlChannels(initCtx context.Context, factory msgstream.Factory, chanName
} }
for i, name := range names { for i, name := range names {
ms, err := factory.NewMsgStream(initCtx) var ms msgstream.MsgStream
if err != nil { if !streamingutil.IsStreamingServiceEnabled() {
log.Ctx(initCtx).Error("Failed to add msgstream", var err error
zap.String("name", name), ms, err = factory.NewMsgStream(initCtx)
zap.Error(err)) if err != nil {
panic("Failed to add msgstream") log.Ctx(initCtx).Error("Failed to add msgstream",
} zap.String("name", name),
zap.Error(err))
panic("Failed to add msgstream")
}
if params.PreCreatedTopicEnabled.GetAsBool() { if params.PreCreatedTopicEnabled.GetAsBool() {
d.checkPreCreatedTopic(initCtx, factory, name) d.checkPreCreatedTopic(initCtx, factory, name)
} }
ms.AsProducer(initCtx, []string{name}) ms.AsProducer(initCtx, []string{name})
}
dms := &dmlMsgStream{ dms := &dmlMsgStream{
ms: ms, ms: ms,
refcnt: 0, refcnt: 0,

View File

@ -155,6 +155,9 @@ type handlerCreateFunc func(ctx context.Context, assign *types.PChannelInfoAssig
func (hc *handlerClientImpl) createHandlerAfterStreamingNodeReady(ctx context.Context, logger *log.MLogger, pchannel string, create handlerCreateFunc) (any, error) { func (hc *handlerClientImpl) createHandlerAfterStreamingNodeReady(ctx context.Context, logger *log.MLogger, pchannel string, create handlerCreateFunc) (any, error) {
// TODO: backoff should be configurable. // TODO: backoff should be configurable.
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for { for {
assign := hc.watcher.Get(ctx, pchannel) assign := hc.watcher.Get(ctx, pchannel)
if assign != nil { if assign != nil {

View File

@ -117,6 +117,7 @@ func (m *pchannelCheckpointManager) background(previous message.MessageID) {
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for { for {
current, err := m.blockUntilCheckpointUpdate(previous) current, err := m.blockUntilCheckpointUpdate(previous)
if err != nil { if err != nil {

View File

@ -2,6 +2,7 @@ package lazygrpc
import ( import (
"context" "context"
"time"
"github.com/cenkalti/backoff/v4" "github.com/cenkalti/backoff/v4"
"github.com/cockroachdb/errors" "github.com/cockroachdb/errors"
@ -50,6 +51,11 @@ type connImpl struct {
func (c *connImpl) initialize() { func (c *connImpl) initialize() {
defer c.initializationNotifier.Finish(struct{}{}) defer c.initializationNotifier.Finish(struct{}{})
newBackOff := backoff.NewExponentialBackOff()
newBackOff.InitialInterval = 100 * time.Millisecond
newBackOff.MaxInterval = 10 * time.Second
newBackOff.MaxElapsedTime = 0
backoff.Retry(func() error { backoff.Retry(func() error {
conn, err := c.dialer(c.initializationNotifier.Context()) conn, err := c.dialer(c.initializationNotifier.Context())
if err != nil { if err != nil {
@ -62,7 +68,7 @@ func (c *connImpl) initialize() {
} }
c.conn.Set(conn) c.conn.Set(conn)
return nil return nil
}, backoff.NewExponentialBackOff()) }, newBackOff)
} }
func (c *connImpl) GetConn(ctx context.Context) (*grpc.ClientConn, error) { func (c *connImpl) GetConn(ctx context.Context) (*grpc.ClientConn, error) {

View File

@ -55,13 +55,11 @@ type BackoffTimer struct {
func (t *BackoffTimer) EnableBackoff() { func (t *BackoffTimer) EnableBackoff() {
if t.backoff == nil { if t.backoff == nil {
cfg := t.configFetcher.BackoffConfig() cfg := t.configFetcher.BackoffConfig()
defaultInterval := t.configFetcher.DefaultInterval()
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = cfg.InitialInterval backoff.InitialInterval = cfg.InitialInterval
backoff.Multiplier = cfg.Multiplier backoff.Multiplier = cfg.Multiplier
backoff.MaxInterval = cfg.MaxInterval backoff.MaxInterval = cfg.MaxInterval
backoff.MaxElapsedTime = defaultInterval backoff.MaxElapsedTime = 0
backoff.Stop = defaultInterval
backoff.Reset() backoff.Reset()
t.backoff = backoff t.backoff = backoff
} }
@ -72,14 +70,6 @@ func (t *BackoffTimer) DisableBackoff() {
t.backoff = nil t.backoff = nil
} }
// IsBackoffStopped returns the elapsed time of backoff
func (t *BackoffTimer) IsBackoffStopped() bool {
if t.backoff != nil {
return t.backoff.GetElapsedTime() > t.backoff.MaxElapsedTime
}
return true
}
// NextTimer returns the next timer and the duration of the timer // NextTimer returns the next timer and the duration of the timer
func (t *BackoffTimer) NextTimer() (<-chan time.Time, time.Duration) { func (t *BackoffTimer) NextTimer() (<-chan time.Time, time.Duration) {
nextBackoff := t.NextInterval() nextBackoff := t.NextInterval()
@ -98,13 +88,11 @@ func (t *BackoffTimer) NextInterval() time.Duration {
// NewBackoffWithInstant creates a new backoff with instant // NewBackoffWithInstant creates a new backoff with instant
func NewBackoffWithInstant(fetcher BackoffTimerConfigFetcher) *BackoffWithInstant { func NewBackoffWithInstant(fetcher BackoffTimerConfigFetcher) *BackoffWithInstant {
cfg := fetcher.BackoffConfig() cfg := fetcher.BackoffConfig()
defaultInterval := fetcher.DefaultInterval()
backoff := backoff.NewExponentialBackOff() backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = cfg.InitialInterval backoff.InitialInterval = cfg.InitialInterval
backoff.Multiplier = cfg.Multiplier backoff.Multiplier = cfg.Multiplier
backoff.MaxInterval = cfg.MaxInterval backoff.MaxInterval = cfg.MaxInterval
backoff.MaxElapsedTime = defaultInterval backoff.MaxElapsedTime = 0
backoff.Stop = defaultInterval
backoff.Reset() backoff.Reset()
return &BackoffWithInstant{ return &BackoffWithInstant{
backoff: backoff, backoff: backoff,

View File

@ -21,24 +21,14 @@ func TestBackoffTimer(t *testing.T) {
assert.Equal(t, time.Second, b.NextInterval()) assert.Equal(t, time.Second, b.NextInterval())
assert.Equal(t, time.Second, b.NextInterval()) assert.Equal(t, time.Second, b.NextInterval())
assert.Equal(t, time.Second, b.NextInterval()) assert.Equal(t, time.Second, b.NextInterval())
assert.True(t, b.IsBackoffStopped())
b.EnableBackoff() b.EnableBackoff()
assert.False(t, b.IsBackoffStopped())
timer, backoff := b.NextTimer() timer, backoff := b.NextTimer()
assert.Less(t, backoff, 200*time.Millisecond) assert.Less(t, backoff, 200*time.Millisecond)
for { <-timer
<-timer _, backoff = b.NextTimer()
if b.IsBackoffStopped() { assert.NotZero(t, backoff)
break
}
timer, _ = b.NextTimer()
}
assert.True(t, b.IsBackoffStopped())
assert.Equal(t, time.Second, b.NextInterval())
b.DisableBackoff() b.DisableBackoff()
assert.Equal(t, time.Second, b.NextInterval()) assert.Equal(t, time.Second, b.NextInterval())
assert.True(t, b.IsBackoffStopped())
} }
} }