fix grpc client retry on node server not match error (#28169)

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2023-11-03 23:42:16 +08:00 committed by GitHub
parent 3a81636ab0
commit 68a86471ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 9 deletions

View File

@ -377,7 +377,11 @@ func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, nee
}
return true, true, err
case IsServerIDMismatchErr(err):
fallthrough
if ok, err := c.checkNodeSessionExist(ctx); !ok {
// if session doesn't exist, no need to retry for datanode/indexnode/querynode
return false, false, err
}
return true, true, err
case IsCrossClusterRoutingErr(err):
return true, true, err
default:
@ -387,6 +391,19 @@ func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, nee
}
}
func (c *ClientBase[T]) checkNodeSessionExist(ctx context.Context) (bool, error) {
switch c.GetRole() {
case typeutil.DataNodeRole, typeutil.IndexNodeRole, typeutil.QueryNodeRole:
err := c.verifySession(ctx)
if err != nil && errors.Is(err, merr.ErrNodeNotFound) {
log.Warn("failed to verify node session", zap.Error(err))
// stop retry
return false, err
}
}
return true, nil
}
func (c *ClientBase[T]) call(ctx context.Context, caller func(client T) (any, error)) (any, error) {
log := log.Ctx(ctx).With(zap.String("client_role", c.GetRole()))
var (
@ -412,15 +429,9 @@ func (c *ClientBase[T]) call(ctx context.Context, caller func(client T) (any, er
defer cancel()
err := retry.Do(ctx, func() error {
if generic.IsZero(client) {
switch c.GetRole() {
case typeutil.DataNodeRole, typeutil.IndexNodeRole, typeutil.QueryNodeRole:
if ok, err := c.checkNodeSessionExist(ctx); !ok {
// if session doesn't exist, no need to reset connection for datanode/indexnode/querynode
err := c.verifySession(ctx)
if err != nil && errors.Is(err, merr.ErrNodeNotFound) {
log.Warn("failed to verify node session", zap.Error(err))
// stop retry
return retry.Unrecoverable(err)
}
return retry.Unrecoverable(err)
}
err := errors.Wrap(clientErr, "empty grpc client")

View File

@ -115,6 +115,15 @@ func TestClientBase_NodeSessionNotExist(t *testing.T) {
return struct{}{}, nil
})
assert.True(t, errors.Is(err, merr.ErrNodeNotFound))
// test node already down, but new node start up with same ip and port
base.grpcClientMtx.Lock()
base.grpcClient = &mockClient{}
base.grpcClientMtx.Unlock()
_, err = base.Call(ctx, func(client *mockClient) (any, error) {
return struct{}{}, merr.ErrNodeNotMatch
})
assert.True(t, errors.Is(err, merr.ErrNodeNotFound))
}
func TestClientBase_Call(t *testing.T) {