diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index 211640af29..7875963c73 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -55,7 +55,7 @@ import ( ) const ( - connEtcdMaxRetryTime = 100000 + connEtcdMaxRetryTime = 100 allPartitionID = 0 // paritionID means no filtering ) diff --git a/internal/datanode/data_node.go b/internal/datanode/data_node.go index d6c8cb9d21..193dd6315f 100644 --- a/internal/datanode/data_node.go +++ b/internal/datanode/data_node.go @@ -76,7 +76,7 @@ const ( MetricRequestsSuccess = "success" // ConnectEtcdMaxRetryTime is used to limit the max retry time for connection etcd - ConnectEtcdMaxRetryTime = 1000 + ConnectEtcdMaxRetryTime = 100 ) const illegalRequestErrStr = "Illegal request" diff --git a/internal/distributed/rootcoord/service_test.go b/internal/distributed/rootcoord/service_test.go index d9ab361fb7..148fed9952 100644 --- a/internal/distributed/rootcoord/service_test.go +++ b/internal/distributed/rootcoord/service_test.go @@ -948,7 +948,7 @@ func initEtcd(etcdEndpoints []string) (*clientv3.Client, error) { etcdCli = etcd return nil } - err := retry.Do(context.TODO(), connectEtcdFn, retry.Attempts(300)) + err := retry.Do(context.TODO(), connectEtcdFn, retry.Attempts(100)) if err != nil { return nil, err } diff --git a/internal/indexcoord/index_coord.go b/internal/indexcoord/index_coord.go index 090d6e7a55..cdbd9d6387 100644 --- a/internal/indexcoord/index_coord.go +++ b/internal/indexcoord/index_coord.go @@ -183,7 +183,7 @@ func (i *IndexCoord) Init() error { return err } log.Debug("IndexCoord try to connect etcd") - err = retry.Do(i.loopCtx, connectEtcdFn, retry.Attempts(300)) + err = retry.Do(i.loopCtx, connectEtcdFn, retry.Attempts(100)) if err != nil { log.Error("IndexCoord try to connect etcd failed", zap.Error(err)) initErr = err diff --git a/internal/rootcoord/root_coord.go b/internal/rootcoord/root_coord.go index 88557e5e33..ecc12c8944 100644 --- a/internal/rootcoord/root_coord.go +++ b/internal/rootcoord/root_coord.go @@ -1074,7 +1074,7 @@ func (c *Core) Init() error { return nil } log.Debug("RootCoord, Connecting to Etcd", zap.String("kv root", Params.EtcdCfg.KvRootPath), zap.String("meta root", Params.EtcdCfg.MetaRootPath)) - err := retry.Do(c.ctx, connectEtcdFn, retry.Attempts(300)) + err := retry.Do(c.ctx, connectEtcdFn, retry.Attempts(100)) if err != nil { return } diff --git a/internal/storage/minio_chunk_manager.go b/internal/storage/minio_chunk_manager.go index 9d3370ee3f..cdfe9e080f 100644 --- a/internal/storage/minio_chunk_manager.go +++ b/internal/storage/minio_chunk_manager.go @@ -81,7 +81,7 @@ func newMinioChunkManagerWithConfig(ctx context.Context, c *config) (*MinioChunk } return nil } - err = retry.Do(ctx, checkBucketFn, retry.Attempts(300)) + err = retry.Do(ctx, checkBucketFn, retry.Attempts(100)) if err != nil { return nil, err } diff --git a/internal/util/retry/retry.go b/internal/util/retry/retry.go index b3f8e71e80..1aea74d3a6 100644 --- a/internal/util/retry/retry.go +++ b/internal/util/retry/retry.go @@ -15,6 +15,9 @@ import ( "context" "time" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/util/errorutil" ) @@ -32,6 +35,9 @@ func Do(ctx context.Context, fn func() error, opts ...Option) error { for i := uint(0); i < c.attempts; i++ { if err := fn(); err != nil { + if i%10 == 0 { + log.Debug("retry func failed", zap.Uint("retry time", i), zap.Error(err)) + } el = append(el, err) diff --git a/internal/util/sessionutil/session_util.go b/internal/util/sessionutil/session_util.go index d3ee112b56..964f7c296f 100644 --- a/internal/util/sessionutil/session_util.go +++ b/internal/util/sessionutil/session_util.go @@ -92,7 +92,7 @@ func NewSession(ctx context.Context, metaRoot string, client *clientv3.Client) * session.etcdCli = client return nil } - err := retry.Do(ctx, connectEtcdFn, retry.Attempts(300)) + err := retry.Do(ctx, connectEtcdFn, retry.Attempts(100)) if err != nil { log.Warn("failed to initialize session", zap.Error(err)) @@ -241,7 +241,7 @@ func (s *Session) registerService() (<-chan *clientv3.LeaseKeepAliveResponse, er log.Debug("Session register successfully", zap.Int64("ServerID", s.ServerID)) return nil } - err := retry.Do(s.ctx, registerFn, retry.Attempts(DefaultRetryTimes), retry.Sleep(500*time.Millisecond)) + err := retry.Do(s.ctx, registerFn, retry.Attempts(DefaultRetryTimes)) if err != nil { return nil, err }