From da41a5b51e950a1c21d04d9dde7a7ac4ab712331 Mon Sep 17 00:00:00 2001 From: wei liu Date: Tue, 7 Nov 2023 11:54:18 +0800 Subject: [PATCH] fix check grpc error logic (#28182) Signed-off-by: Wei Liu --- internal/util/grpcclient/client.go | 33 ++++++++++++------------- internal/util/grpcclient/client_test.go | 23 ++++++++++++++--- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/internal/util/grpcclient/client.go b/internal/util/grpcclient/client.go index cc33d5b562..d9b83a067b 100644 --- a/internal/util/grpcclient/client.go +++ b/internal/util/grpcclient/client.go @@ -361,21 +361,22 @@ func (c *ClientBase[T]) needResetCancel() (needReset bool) { return false } -func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, needReset bool, retErr error) { +func (c *ClientBase[T]) checkGrpcErr(ctx context.Context, err error) (needRetry, needReset bool, retErr error) { log := log.Ctx(ctx).With(zap.String("clientRole", c.GetRole())) - switch { - case funcutil.IsGrpcErr(err): - // grpc err - log.Warn("call received grpc error", zap.Error(err)) - if funcutil.IsGrpcErr(err, codes.Canceled, codes.DeadlineExceeded) { - // canceled or deadline exceeded - return true, c.needResetCancel(), err - } + // Unknown err + if !funcutil.IsGrpcErr(err) { + log.Warn("fail to grpc call because of unknown error", zap.Error(err)) + return false, false, err + } - if funcutil.IsGrpcErr(err, codes.Unimplemented) { - return false, false, merr.WrapErrServiceUnimplemented(err) - } - return true, true, err + // grpc err + log.Warn("call received grpc error", zap.Error(err)) + switch { + case funcutil.IsGrpcErr(err, codes.Canceled, codes.DeadlineExceeded): + // canceled or deadline exceeded + return true, c.needResetCancel(), err + case funcutil.IsGrpcErr(err, codes.Unimplemented): + return false, false, merr.WrapErrServiceUnimplemented(err) case IsServerIDMismatchErr(err): if ok, err := c.checkNodeSessionExist(ctx); !ok { // if session doesn't exist, no need to retry for datanode/indexnode/querynode @@ -385,9 +386,7 @@ func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, nee case IsCrossClusterRoutingErr(err): return true, true, err default: - log.Warn("fail to grpc call because of unknown error", zap.Error(err)) - // Unknown err - return false, false, err + return true, true, err } } @@ -443,7 +442,7 @@ func (c *ClientBase[T]) call(ctx context.Context, caller func(client T) (any, er ret, err = caller(client) if err != nil { var needRetry, needReset bool - needRetry, needReset, err = c.checkErr(ctx, err) + needRetry, needReset, err = c.checkGrpcErr(ctx, err) if !needRetry { // stop retry err = retry.Unrecoverable(err) diff --git a/internal/util/grpcclient/client_test.go b/internal/util/grpcclient/client_test.go index 32cd123c53..cb27f3d639 100644 --- a/internal/util/grpcclient/client_test.go +++ b/internal/util/grpcclient/client_test.go @@ -121,7 +121,7 @@ func TestClientBase_NodeSessionNotExist(t *testing.T) { base.grpcClient = &mockClient{} base.grpcClientMtx.Unlock() _, err = base.Call(ctx, func(client *mockClient) (any, error) { - return struct{}{}, merr.ErrNodeNotMatch + return struct{}{}, status.Errorf(codes.Unknown, merr.ErrNodeNotMatch.Error()) }) assert.True(t, errors.Is(err, merr.ErrNodeNotFound)) } @@ -343,19 +343,34 @@ func TestClientBase_Recall(t *testing.T) { }) } -func TestClientBase_CheckError(t *testing.T) { +func TestClientBase_CheckGrpcError(t *testing.T) { base := ClientBase[*mockClient]{} base.grpcClient = &mockClient{} base.MaxAttempts = 1 ctx := context.Background() - retry, reset, _ := base.checkErr(ctx, status.Errorf(codes.Canceled, "fake context canceled")) + retry, reset, _ := base.checkGrpcErr(ctx, status.Errorf(codes.Canceled, "fake context canceled")) assert.True(t, retry) assert.True(t, reset) - retry, reset, _ = base.checkErr(ctx, status.Errorf(codes.Unimplemented, "fake context canceled")) + retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unimplemented, "fake context canceled")) assert.False(t, retry) assert.False(t, reset) + + // test serverId mismatch + retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrNodeNotMatch.Error())) + assert.True(t, retry) + assert.True(t, reset) + + // test cross cluster + retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrServiceCrossClusterRouting.Error())) + assert.True(t, retry) + assert.True(t, reset) + + // test default + retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrNodeNotFound.Error())) + assert.True(t, retry) + assert.True(t, reset) } type server struct {