fix: delete by expr failed at retry progress (#35241) (#35421)

issue: #35240
pr: #35241
delete by expr shard the same err object between channels, so if one
channel's query failed, it will fail all channel, which will break
channel level retry policy, and make delete operation failed.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-08-12 16:16:20 +08:00 committed by GitHub
parent 8a7c00d2c7
commit bfd20d5f99
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -247,7 +247,6 @@ type deleteRunner struct {
ts uint64
lb LBPolicy
count atomic.Int64
err error
// task queue
queue *dmTaskQueue
@ -430,7 +429,11 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe
}
taskCh := make(chan *deleteTask, 256)
go dr.receiveQueryResult(ctx, client, taskCh)
var receiveErr error
go func() {
receiveErr = dr.receiveQueryResult(ctx, client, taskCh)
close(taskCh)
}()
// wait all task finish
for task := range taskCh {
err := task.WaitToFinish()
@ -441,50 +444,42 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe
}
// query or produce task failed
if dr.err != nil {
return dr.err
if receiveErr != nil {
return receiveErr
}
return nil
}
}
func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) {
defer func() {
close(taskCh)
}()
func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) error {
for {
result, err := client.Recv()
if err != nil {
if err == io.EOF {
log.Debug("query stream for delete finished", zap.Int64("msgID", dr.msgID))
return
return nil
}
dr.err = err
return
return err
}
err = merr.Error(result.GetStatus())
if err != nil {
dr.err = err
log.Warn("query stream for delete get error status", zap.Int64("msgID", dr.msgID), zap.Error(err))
return
return err
}
if dr.limiter != nil {
err := dr.limiter.Alloc(ctx, []int64{dr.collectionID}, internalpb.RateType_DMLDelete, proto.Size(result.GetIds()))
if err != nil {
dr.err = err
log.Warn("query stream for delete failed because rate limiter", zap.Int64("msgID", dr.msgID), zap.Error(err))
return
return err
}
}
task, err := dr.produce(ctx, result.GetIds())
if err != nil {
dr.err = err
log.Warn("produce delete task failed", zap.Error(err))
return
return err
}
taskCh <- task