mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix: [2.5] Fix duplicate autoID between import and insert (#42520)
Remove the unlimited logID mechanism and switch to redundantly allocating a large number of IDs. issue: https://github.com/milvus-io/milvus/issues/42518 pr: https://github.com/milvus-io/milvus/pull/42519 Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
a0a6510db9
commit
fdfb78b9e5
@ -296,14 +296,21 @@ func AssembleImportRequest(task ImportTask, job ImportJob, meta *meta, alloc all
|
|||||||
return stat.GetTotalRows()
|
return stat.GetTotalRows()
|
||||||
})
|
})
|
||||||
|
|
||||||
// Allocated IDs are used for rowID and the BEGINNING of the logID.
|
// Pre-allocate IDs for autoIDs and logIDs.
|
||||||
allocNum := totalRows + 1
|
preAllocIDNum := (totalRows + 1) * paramtable.Get().DataCoordCfg.ImportPreAllocIDExpansionFactor.GetAsInt64()
|
||||||
|
|
||||||
idBegin, idEnd, err := alloc.AllocN(allocNum)
|
idBegin, idEnd, err := alloc.AllocN(preAllocIDNum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Info("pre-allocate ids and ts for import task", WrapTaskLog(task,
|
||||||
|
zap.Int64("totalRows", totalRows),
|
||||||
|
zap.Int64("idBegin", idBegin),
|
||||||
|
zap.Int64("idEnd", idEnd),
|
||||||
|
zap.Uint64("ts", ts))...,
|
||||||
|
)
|
||||||
|
|
||||||
importFiles := lo.Map(task.GetFileStats(), func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
|
importFiles := lo.Map(task.GetFileStats(), func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
|
||||||
return fileStat.GetImportFile()
|
return fileStat.GetImportFile()
|
||||||
})
|
})
|
||||||
|
|||||||
@ -19,7 +19,6 @@ package importv2
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
@ -66,8 +65,8 @@ func NewImportTask(req *datapb.ImportRequest,
|
|||||||
if importutilv2.IsBackup(req.GetOptions()) {
|
if importutilv2.IsBackup(req.GetOptions()) {
|
||||||
UnsetAutoID(req.GetSchema())
|
UnsetAutoID(req.GetSchema())
|
||||||
}
|
}
|
||||||
// Setting end as math.MaxInt64 to incrementally allocate logID.
|
// Allocator for autoIDs and logIDs.
|
||||||
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), math.MaxInt64)
|
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), req.GetIDRange().GetEnd())
|
||||||
task := &ImportTask{
|
task := &ImportTask{
|
||||||
ImportTaskV2: &datapb.ImportTaskV2{
|
ImportTaskV2: &datapb.ImportTaskV2{
|
||||||
JobID: req.GetJobID(),
|
JobID: req.GetJobID(),
|
||||||
|
|||||||
@ -20,7 +20,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
@ -61,8 +60,8 @@ func NewL0ImportTask(req *datapb.ImportRequest,
|
|||||||
cm storage.ChunkManager,
|
cm storage.ChunkManager,
|
||||||
) Task {
|
) Task {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
// Setting end as math.MaxInt64 to incrementally allocate logID.
|
// Allocator for autoIDs and logIDs.
|
||||||
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), math.MaxInt64)
|
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), req.GetIDRange().GetEnd())
|
||||||
task := &L0ImportTask{
|
task := &L0ImportTask{
|
||||||
ImportTaskV2: &datapb.ImportTaskV2{
|
ImportTaskV2: &datapb.ImportTaskV2{
|
||||||
JobID: req.GetJobID(),
|
JobID: req.GetJobID(),
|
||||||
|
|||||||
@ -461,6 +461,9 @@ func (node *DataNode) ImportV2(ctx context.Context, req *datapb.ImportRequest) (
|
|||||||
zap.Int64("collectionID", req.GetCollectionID()),
|
zap.Int64("collectionID", req.GetCollectionID()),
|
||||||
zap.Int64s("partitionIDs", req.GetPartitionIDs()),
|
zap.Int64s("partitionIDs", req.GetPartitionIDs()),
|
||||||
zap.Strings("vchannels", req.GetVchannels()),
|
zap.Strings("vchannels", req.GetVchannels()),
|
||||||
|
zap.Uint64("ts", req.GetTs()),
|
||||||
|
zap.Int64("idBegin", req.GetIDRange().GetBegin()),
|
||||||
|
zap.Int64("idEnd", req.GetIDRange().GetEnd()),
|
||||||
zap.Any("segments", req.GetRequestSegments()),
|
zap.Any("segments", req.GetRequestSegments()),
|
||||||
zap.Any("files", req.GetFiles()))
|
zap.Any("files", req.GetFiles()))
|
||||||
|
|
||||||
|
|||||||
@ -3629,15 +3629,16 @@ type dataCoordConfig struct {
|
|||||||
CheckAutoBalanceConfigInterval ParamItem `refreshable:"false"`
|
CheckAutoBalanceConfigInterval ParamItem `refreshable:"false"`
|
||||||
|
|
||||||
// import
|
// import
|
||||||
FilesPerPreImportTask ParamItem `refreshable:"true"`
|
FilesPerPreImportTask ParamItem `refreshable:"true"`
|
||||||
ImportTaskRetention ParamItem `refreshable:"true"`
|
ImportTaskRetention ParamItem `refreshable:"true"`
|
||||||
MaxSizeInMBPerImportTask ParamItem `refreshable:"true"`
|
MaxSizeInMBPerImportTask ParamItem `refreshable:"true"`
|
||||||
ImportScheduleInterval ParamItem `refreshable:"true"`
|
ImportScheduleInterval ParamItem `refreshable:"true"`
|
||||||
ImportCheckIntervalHigh ParamItem `refreshable:"true"`
|
ImportCheckIntervalHigh ParamItem `refreshable:"true"`
|
||||||
ImportCheckIntervalLow ParamItem `refreshable:"true"`
|
ImportCheckIntervalLow ParamItem `refreshable:"true"`
|
||||||
MaxFilesPerImportReq ParamItem `refreshable:"true"`
|
MaxFilesPerImportReq ParamItem `refreshable:"true"`
|
||||||
MaxImportJobNum ParamItem `refreshable:"true"`
|
MaxImportJobNum ParamItem `refreshable:"true"`
|
||||||
WaitForIndex ParamItem `refreshable:"true"`
|
WaitForIndex ParamItem `refreshable:"true"`
|
||||||
|
ImportPreAllocIDExpansionFactor ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
GracefulStopTimeout ParamItem `refreshable:"true"`
|
GracefulStopTimeout ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
@ -4540,6 +4541,14 @@ if param targetVecIndexVersion is not set, the default value is -1, which means
|
|||||||
}
|
}
|
||||||
p.WaitForIndex.Init(base.mgr)
|
p.WaitForIndex.Init(base.mgr)
|
||||||
|
|
||||||
|
p.ImportPreAllocIDExpansionFactor = ParamItem{
|
||||||
|
Key: "dataCoord.import.preAllocateIDExpansionFactor",
|
||||||
|
Version: "2.5.13",
|
||||||
|
DefaultValue: "10",
|
||||||
|
Doc: `The expansion factor for pre-allocating IDs during import.`,
|
||||||
|
}
|
||||||
|
p.ImportPreAllocIDExpansionFactor.Init(base.mgr)
|
||||||
|
|
||||||
p.GracefulStopTimeout = ParamItem{
|
p.GracefulStopTimeout = ParamItem{
|
||||||
Key: "dataCoord.gracefulStopTimeout",
|
Key: "dataCoord.gracefulStopTimeout",
|
||||||
Version: "2.3.7",
|
Version: "2.3.7",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user