fix: Fix import fileStats incorrectly set to nil (#43463)

1. Ensure that tasks in the InProgress state return valid fileStats.
2. Enhance import logs.

issue: https://github.com/milvus-io/milvus/issues/43387

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2025-07-22 12:37:01 +08:00 committed by GitHub
parent 563e2935c5
commit 5124ed9758
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 116 additions and 62 deletions

View File

@ -228,7 +228,7 @@ func (c *importChecker) checkPendingJob(job ImportJob) {
log.Warn("add preimport task failed", WrapTaskLog(t, zap.Error(err))...) log.Warn("add preimport task failed", WrapTaskLog(t, zap.Error(err))...)
return return
} }
log.Info("add new preimport task", WrapTaskLog(t)...) log.Info("add new preimport task", WrapTaskLog(t, zap.Any("fileStats", t.GetFileStats()))...)
} }
err = c.importMeta.UpdateJob(c.ctx, job.GetJobID(), UpdateJobState(internalpb.ImportJobState_PreImporting)) err = c.importMeta.UpdateJob(c.ctx, job.GetJobID(), UpdateJobState(internalpb.ImportJobState_PreImporting))
@ -300,7 +300,7 @@ func (c *importChecker) checkPreImportingJob(job ImportJob) {
updateJobState(internalpb.ImportJobState_Failed, UpdateJobReason(err.Error())) updateJobState(internalpb.ImportJobState_Failed, UpdateJobReason(err.Error()))
return return
} }
log.Info("add new import task", WrapTaskLog(t)...) log.Info("add new import task", WrapTaskLog(t, zap.Any("fileStats", t.GetFileStats()))...)
} }
updateJobState(internalpb.ImportJobState_Importing, UpdateRequestedDiskSize(requestSize)) updateJobState(internalpb.ImportJobState_Importing, UpdateRequestedDiskSize(requestSize))

View File

@ -183,23 +183,26 @@ func (t *importTask) QueryTaskOnWorker(cluster session.Cluster) {
dbName = collInfo.DatabaseName dbName = collInfo.DatabaseName
} }
for _, info := range resp.GetImportSegmentsInfo() { if resp.GetState() == datapb.ImportTaskStateV2_InProgress || resp.GetState() == datapb.ImportTaskStateV2_Completed {
segment := t.meta.GetSegment(context.TODO(), info.GetSegmentID()) for _, info := range resp.GetImportSegmentsInfo() {
if info.GetImportedRows() <= segment.GetNumOfRows() { segment := t.meta.GetSegment(context.TODO(), info.GetSegmentID())
continue // rows not changed, no need to update if info.GetImportedRows() <= segment.GetNumOfRows() {
} continue // rows not changed, no need to update
diff := info.GetImportedRows() - segment.GetNumOfRows() }
op := UpdateImportedRows(info.GetSegmentID(), info.GetImportedRows()) diff := info.GetImportedRows() - segment.GetNumOfRows()
err = t.meta.UpdateSegmentsInfo(context.TODO(), op) op := UpdateImportedRows(info.GetSegmentID(), info.GetImportedRows())
if err != nil { err = t.meta.UpdateSegmentsInfo(context.TODO(), op)
log.Warn("update import segment rows failed", WrapTaskLog(t, zap.Error(err))...) if err != nil {
return log.Warn("update import segment rows failed", WrapTaskLog(t, zap.Error(err))...)
} return
}
log.Info("update import segment rows done", WrapTaskLog(t, zap.Int64("segmentID", info.GetSegmentID()), zap.Int64("importedRows", info.GetImportedRows()))...)
metrics.DataCoordBulkVectors.WithLabelValues( metrics.DataCoordBulkVectors.WithLabelValues(
dbName, dbName,
strconv.FormatInt(t.GetCollectionID(), 10), strconv.FormatInt(t.GetCollectionID(), 10),
).Add(float64(diff)) ).Add(float64(diff))
}
} }
if resp.GetState() == datapb.ImportTaskStateV2_Completed { if resp.GetState() == datapb.ImportTaskStateV2_Completed {
for _, info := range resp.GetImportSegmentsInfo() { for _, info := range resp.GetImportSegmentsInfo() {

View File

@ -152,14 +152,19 @@ func (p *preImportTask) QueryTaskOnWorker(cluster session.Cluster) {
log.Warn("preimport failed", WrapTaskLog(p, zap.String("reason", resp.GetReason()))...) log.Warn("preimport failed", WrapTaskLog(p, zap.String("reason", resp.GetReason()))...)
return return
} }
actions := []UpdateAction{UpdateFileStats(resp.GetFileStats())} actions := []UpdateAction{}
if resp.GetState() == datapb.ImportTaskStateV2_InProgress || resp.GetState() == datapb.ImportTaskStateV2_Completed {
actions = append(actions, UpdateFileStats(resp.GetFileStats()))
}
if resp.GetState() == datapb.ImportTaskStateV2_Completed { if resp.GetState() == datapb.ImportTaskStateV2_Completed {
actions = append(actions, UpdateState(datapb.ImportTaskStateV2_Completed)) actions = append(actions, UpdateState(datapb.ImportTaskStateV2_Completed))
} }
err = p.importMeta.UpdateTask(context.TODO(), p.GetTaskID(), actions...) if len(actions) > 0 {
if err != nil { err = p.importMeta.UpdateTask(context.TODO(), p.GetTaskID(), actions...)
log.Warn("update preimport task failed", WrapTaskLog(p, zap.Error(err))...) if err != nil {
return log.Warn("update preimport task failed", WrapTaskLog(p, zap.Error(err))...)
return
}
} }
log.Info("query preimport", WrapTaskLog(p, zap.String("respState", resp.GetState().String()), log.Info("query preimport", WrapTaskLog(p, zap.String("respState", resp.GetState().String()),
zap.Any("fileStats", resp.GetFileStats()))...) zap.Any("fileStats", resp.GetFileStats()))...)

View File

@ -220,9 +220,9 @@ func (c *cluster) QueryCompaction(nodeID int64, in *datapb.CompactionStateReques
return nil, err return nil, err
} }
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &datapb.CompactionPlanResult{State: taskcommon.ToCompactionState(state)}, nil return &datapb.CompactionPlanResult{State: taskcommon.ToCompactionState(state)}, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &datapb.CompactionStateResponse{} result := &datapb.CompactionStateResponse{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {
@ -290,9 +290,9 @@ func (c *cluster) QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest)
} }
reason := resProperties.GetTaskReason() reason := resProperties.GetTaskReason()
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &datapb.QueryPreImportResponse{State: taskcommon.ToImportState(state), Reason: reason}, nil return &datapb.QueryPreImportResponse{State: taskcommon.ToImportState(state), Reason: reason}, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &datapb.QueryPreImportResponse{} result := &datapb.QueryPreImportResponse{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {
@ -321,9 +321,9 @@ func (c *cluster) QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*dat
} }
reason := resProperties.GetTaskReason() reason := resProperties.GetTaskReason()
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &datapb.QueryImportResponse{State: taskcommon.ToImportState(state), Reason: reason}, nil return &datapb.QueryImportResponse{State: taskcommon.ToImportState(state), Reason: reason}, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &datapb.QueryImportResponse{} result := &datapb.QueryImportResponse{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {
@ -371,7 +371,7 @@ func (c *cluster) QueryIndex(nodeID int64, in *workerpb.QueryJobsRequest) (*work
} }
reason := resProperties.GetTaskReason() reason := resProperties.GetTaskReason()
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &workerpb.IndexJobResults{ return &workerpb.IndexJobResults{
Results: []*workerpb.IndexTaskInfo{ Results: []*workerpb.IndexTaskInfo{
{ {
@ -381,7 +381,7 @@ func (c *cluster) QueryIndex(nodeID int64, in *workerpb.QueryJobsRequest) (*work
}, },
}, },
}, nil }, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &workerpb.QueryJobsV2Response{} result := &workerpb.QueryJobsV2Response{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {
@ -430,7 +430,7 @@ func (c *cluster) QueryStats(nodeID int64, in *workerpb.QueryJobsRequest) (*work
} }
reason := resProperties.GetTaskReason() reason := resProperties.GetTaskReason()
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &workerpb.StatsResults{ return &workerpb.StatsResults{
Results: []*workerpb.StatsResult{ Results: []*workerpb.StatsResult{
{ {
@ -440,7 +440,7 @@ func (c *cluster) QueryStats(nodeID int64, in *workerpb.QueryJobsRequest) (*work
}, },
}, },
}, nil }, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &workerpb.QueryJobsV2Response{} result := &workerpb.QueryJobsV2Response{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {
@ -487,7 +487,7 @@ func (c *cluster) QueryAnalyze(nodeID int64, in *workerpb.QueryJobsRequest) (*wo
} }
reason := resProperties.GetTaskReason() reason := resProperties.GetTaskReason()
switch state { switch state {
case taskcommon.None, taskcommon.Init, taskcommon.InProgress, taskcommon.Retry: case taskcommon.None, taskcommon.Init, taskcommon.Retry:
return &workerpb.AnalyzeResults{ return &workerpb.AnalyzeResults{
Results: []*workerpb.AnalyzeResult{ Results: []*workerpb.AnalyzeResult{
{ {
@ -497,7 +497,7 @@ func (c *cluster) QueryAnalyze(nodeID int64, in *workerpb.QueryJobsRequest) (*wo
}, },
}, },
}, nil }, nil
case taskcommon.Finished, taskcommon.Failed: case taskcommon.InProgress, taskcommon.Finished, taskcommon.Failed:
result := &workerpb.QueryJobsV2Response{} result := &workerpb.QueryJobsV2Response{}
err = proto.Unmarshal(resp.GetPayload(), result) err = proto.Unmarshal(resp.GetPayload(), result)
if err != nil { if err != nil {

View File

@ -130,6 +130,10 @@ func (t *ImportTask) Clone() Task {
cancel: cancel, cancel: cancel,
segmentsInfo: infos, segmentsInfo: infos,
req: t.req, req: t.req,
allocator: t.allocator,
manager: t.manager,
syncMgr: t.syncMgr,
cm: t.cm,
metaCaches: t.metaCaches, metaCaches: t.metaCaches,
} }
} }
@ -139,7 +143,9 @@ func (t *ImportTask) Execute() []*conc.Future[any] {
log.Info("start to import", WrapLogFields(t, log.Info("start to import", WrapLogFields(t,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Int64("taskSlot", t.GetSlots()), zap.Int64("taskSlot", t.GetSlots()),
zap.Any("schema", t.GetSchema()))...) zap.Any("files", t.GetFileStats()),
zap.Any("schema", t.GetSchema()),
)...)
t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
req := t.req req := t.req

View File

@ -127,6 +127,10 @@ func (t *L0ImportTask) Clone() Task {
cancel: cancel, cancel: cancel,
segmentsInfo: infos, segmentsInfo: infos,
req: t.req, req: t.req,
allocator: t.allocator,
manager: t.manager,
syncMgr: t.syncMgr,
cm: t.cm,
metaCaches: t.metaCaches, metaCaches: t.metaCaches,
} }
} }
@ -136,7 +140,9 @@ func (t *L0ImportTask) Execute() []*conc.Future[any] {
log.Info("start to import l0", WrapLogFields(t, log.Info("start to import l0", WrapLogFields(t,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Int64("taskSlot", t.GetSlots()), zap.Int64("taskSlot", t.GetSlots()),
zap.Any("schema", t.GetSchema()))...) zap.Any("files", t.GetFileStats()),
zap.Any("schema", t.GetSchema()),
)...)
t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
req := t.req req := t.req

View File

@ -117,6 +117,9 @@ func (t *L0PreImportTask) Clone() Task {
partitionIDs: t.GetPartitionIDs(), partitionIDs: t.GetPartitionIDs(),
vchannels: t.GetVchannels(), vchannels: t.GetVchannels(),
schema: t.GetSchema(), schema: t.GetSchema(),
req: t.req,
manager: t.manager,
cm: t.cm,
} }
} }
@ -125,7 +128,9 @@ func (t *L0PreImportTask) Execute() []*conc.Future[any] {
log.Info("start to preimport l0", WrapLogFields(t, log.Info("start to preimport l0", WrapLogFields(t,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Int64("taskSlot", t.GetSlots()), zap.Int64("taskSlot", t.GetSlots()),
zap.Any("schema", t.GetSchema()))...) zap.Any("files", t.GetFileStats()),
zap.Any("schema", t.GetSchema()),
)...)
t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
files := lo.Map(t.GetFileStats(), files := lo.Map(t.GetFileStats(),
func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {

View File

@ -125,6 +125,9 @@ func (t *PreImportTask) Clone() Task {
vchannels: t.GetVchannels(), vchannels: t.GetVchannels(),
schema: t.GetSchema(), schema: t.GetSchema(),
options: t.options, options: t.options,
req: t.req,
manager: t.manager,
cm: t.cm,
} }
} }
@ -133,7 +136,9 @@ func (t *PreImportTask) Execute() []*conc.Future[any] {
log.Info("start to preimport", WrapLogFields(t, log.Info("start to preimport", WrapLogFields(t,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Int64("taskSlot", t.GetSlots()), zap.Int64("taskSlot", t.GetSlots()),
zap.Any("schema", t.GetSchema()))...) zap.Any("files", t.GetFileStats()),
zap.Any("schema", t.GetSchema()),
)...)
t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
files := lo.Map(t.GetFileStats(), files := lo.Map(t.GetFileStats(),
func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {

View File

@ -372,8 +372,7 @@ func (node *DataNode) ImportV2(ctx context.Context, req *datapb.ImportRequest) (
} }
func (node *DataNode) QueryPreImport(ctx context.Context, req *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) { func (node *DataNode) QueryPreImport(ctx context.Context, req *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) {
log := log.Ctx(ctx).With(zap.Int64("taskID", req.GetTaskID()), log := log.Ctx(ctx).WithRateGroup("datanode.QueryPreImport", 1, 60)
zap.Int64("jobID", req.GetJobID()))
if err := merr.CheckHealthy(node.GetStateCode()); err != nil { if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return &datapb.QueryPreImportResponse{Status: merr.Status(err)}, nil return &datapb.QueryPreImportResponse{Status: merr.Status(err)}, nil
@ -384,22 +383,34 @@ func (node *DataNode) QueryPreImport(ctx context.Context, req *datapb.QueryPreIm
Status: merr.Status(importv2.WrapTaskNotFoundError(req.GetTaskID())), Status: merr.Status(importv2.WrapTaskNotFoundError(req.GetTaskID())),
}, nil }, nil
} }
log.RatedInfo(10, "datanode query preimport", zap.String("state", task.GetState().String()), fileStats := task.(interface {
zap.String("reason", task.GetReason())) GetFileStats() []*datapb.ImportFileStats
}).GetFileStats()
logFields := []zap.Field{
zap.Int64("taskID", task.GetTaskID()),
zap.Int64("jobID", task.GetJobID()),
zap.String("state", task.GetState().String()),
zap.String("reason", task.GetReason()),
zap.Int64("nodeID", node.GetNodeID()),
zap.Any("fileStats", fileStats),
}
if task.GetState() == datapb.ImportTaskStateV2_InProgress {
log.RatedInfo(30, "datanode query preimport", logFields...)
} else {
log.Info("datanode query preimport", logFields...)
}
return &datapb.QueryPreImportResponse{ return &datapb.QueryPreImportResponse{
Status: merr.Success(), Status: merr.Success(),
TaskID: task.GetTaskID(), TaskID: task.GetTaskID(),
State: task.GetState(), State: task.GetState(),
Reason: task.GetReason(), Reason: task.GetReason(),
FileStats: task.(interface { FileStats: fileStats,
GetFileStats() []*datapb.ImportFileStats
}).GetFileStats(),
}, nil }, nil
} }
func (node *DataNode) QueryImport(ctx context.Context, req *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) { func (node *DataNode) QueryImport(ctx context.Context, req *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) {
log := log.Ctx(ctx).With(zap.Int64("taskID", req.GetTaskID()), log := log.Ctx(ctx).WithRateGroup("datanode.QueryImport", 1, 60)
zap.Int64("jobID", req.GetJobID()))
if err := merr.CheckHealthy(node.GetStateCode()); err != nil { if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return &datapb.QueryImportResponse{Status: merr.Status(err)}, nil return &datapb.QueryImportResponse{Status: merr.Status(err)}, nil
@ -420,22 +431,35 @@ func (node *DataNode) QueryImport(ctx context.Context, req *datapb.QueryImportRe
Status: merr.Status(importv2.WrapTaskNotFoundError(req.GetTaskID())), Status: merr.Status(importv2.WrapTaskNotFoundError(req.GetTaskID())),
}, nil }, nil
} }
log.RatedInfo(10, "datanode query import", zap.String("state", task.GetState().String()), segmentsInfo := task.(interface {
zap.String("reason", task.GetReason())) GetSegmentsInfo() []*datapb.ImportSegmentInfo
}).GetSegmentsInfo()
logFields := []zap.Field{
zap.Int64("taskID", task.GetTaskID()),
zap.Int64("jobID", task.GetJobID()),
zap.String("state", task.GetState().String()),
zap.String("reason", task.GetReason()),
zap.Int64("nodeID", node.GetNodeID()),
zap.Any("segmentsInfo", segmentsInfo),
}
if task.GetState() == datapb.ImportTaskStateV2_InProgress {
log.RatedInfo(30, "datanode query import", logFields...)
} else {
log.Info("datanode query import", logFields...)
}
return &datapb.QueryImportResponse{ return &datapb.QueryImportResponse{
Status: merr.Success(), Status: merr.Success(),
TaskID: task.GetTaskID(), TaskID: task.GetTaskID(),
State: task.GetState(), State: task.GetState(),
Reason: task.GetReason(), Reason: task.GetReason(),
ImportSegmentsInfo: task.(interface { ImportSegmentsInfo: segmentsInfo,
GetSegmentsInfo() []*datapb.ImportSegmentInfo
}).GetSegmentsInfo(),
}, nil }, nil
} }
func (node *DataNode) DropImport(ctx context.Context, req *datapb.DropImportRequest) (*commonpb.Status, error) { func (node *DataNode) DropImport(ctx context.Context, req *datapb.DropImportRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(zap.Int64("taskID", req.GetTaskID()), log := log.Ctx(ctx).With(zap.Int64("taskID", req.GetTaskID()),
zap.Int64("jobID", req.GetJobID())) zap.Int64("jobID", req.GetJobID()),
zap.Int64("nodeID", node.GetNodeID()))
if err := merr.CheckHealthy(node.GetStateCode()); err != nil { if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return merr.Status(err), nil return merr.Status(err), nil