mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix: Fix import segment size is uneven (#33605)
The data coordinator computed the appropriate number of import segments, thus when importing in the data node, one can randomly select a segment. issue: https://github.com/milvus-io/milvus/issues/33604 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
545d4725fb
commit
bbdf99a45e
@ -224,7 +224,6 @@ func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Futu
|
||||
log.Info("start to sync import data", WrapLogFields(task)...)
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
syncTasks := make([]syncmgr.Task, 0)
|
||||
segmentImportedSizes := make(map[int64]int)
|
||||
for channelIdx, datas := range hashedData {
|
||||
channel := task.GetVchannels()[channelIdx]
|
||||
for partitionIdx, data := range datas {
|
||||
@ -232,13 +231,11 @@ func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Futu
|
||||
continue
|
||||
}
|
||||
partitionID := task.GetPartitionIDs()[partitionIdx]
|
||||
size := data.GetMemorySize()
|
||||
segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size)
|
||||
segmentID := PickSegment(task, channel, partitionID)
|
||||
syncTask, err := NewSyncTask(task.ctx, task, segmentID, partitionID, channel, data)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
segmentImportedSizes[segmentID] += size
|
||||
future := t.syncMgr.SyncData(task.ctx, syncTask)
|
||||
futures = append(futures, future)
|
||||
syncTasks = append(syncTasks, syncTask)
|
||||
|
||||
@ -19,7 +19,9 @@ package importv2
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
@ -34,7 +36,6 @@ import (
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -99,28 +100,13 @@ func NewImportSegmentInfo(syncTask syncmgr.Task, task *ImportTask) (*datapb.Impo
|
||||
}, nil
|
||||
}
|
||||
|
||||
func PickSegment(task *ImportTask, segmentImportedSizes map[int64]int, vchannel string, partitionID int64, sizeToImport int) int64 {
|
||||
func PickSegment(task *ImportTask, vchannel string, partitionID int64) int64 {
|
||||
candidates := lo.Filter(task.req.GetRequestSegments(), func(info *datapb.ImportRequestSegment, _ int) bool {
|
||||
return info.GetVchannel() == vchannel && info.GetPartitionID() == partitionID
|
||||
})
|
||||
|
||||
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt() * 1024 * 1024
|
||||
|
||||
for _, candidate := range candidates {
|
||||
sizeImported := segmentImportedSizes[candidate.GetSegmentID()]
|
||||
if sizeImported+sizeToImport <= segmentMaxSize {
|
||||
return candidate.GetSegmentID()
|
||||
}
|
||||
}
|
||||
segmentID := lo.MinBy(task.GetSegmentsInfo(), func(s1, s2 *datapb.ImportSegmentInfo) bool {
|
||||
return segmentImportedSizes[s1.GetSegmentID()] < segmentImportedSizes[s2.GetSegmentID()]
|
||||
}).GetSegmentID()
|
||||
log.Warn("failed to pick an appropriate segment, opt for the smallest one instead",
|
||||
WrapLogFields(task, zap.Int64("segmentID", segmentID),
|
||||
zap.Int("sizeToImport", sizeToImport),
|
||||
zap.Int("sizeImported", segmentImportedSizes[segmentID]),
|
||||
zap.Int("segmentMaxSize", segmentMaxSize))...)
|
||||
return segmentID
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
return candidates[r.Intn(len(candidates))].GetSegmentID()
|
||||
}
|
||||
|
||||
func CheckRowsEqual(schema *schemapb.CollectionSchema, data *storage.InsertData) error {
|
||||
|
||||
@ -113,3 +113,57 @@ func Test_UnsetAutoID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_PickSegment(t *testing.T) {
|
||||
const (
|
||||
vchannel = "ch-0"
|
||||
partitionID = 10
|
||||
)
|
||||
task := &ImportTask{
|
||||
req: &datapb.ImportRequest{
|
||||
RequestSegments: []*datapb.ImportRequestSegment{
|
||||
{
|
||||
SegmentID: 100,
|
||||
PartitionID: partitionID,
|
||||
Vchannel: vchannel,
|
||||
},
|
||||
{
|
||||
SegmentID: 101,
|
||||
PartitionID: partitionID,
|
||||
Vchannel: vchannel,
|
||||
},
|
||||
{
|
||||
SegmentID: 102,
|
||||
PartitionID: partitionID,
|
||||
Vchannel: vchannel,
|
||||
},
|
||||
{
|
||||
SegmentID: 103,
|
||||
PartitionID: partitionID,
|
||||
Vchannel: vchannel,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
importedSize := map[int64]int{}
|
||||
|
||||
totalSize := 8 * 1024 * 1024 * 1024
|
||||
batchSize := 16 * 1024 * 1024
|
||||
|
||||
for totalSize > 0 {
|
||||
picked := PickSegment(task, vchannel, partitionID)
|
||||
importedSize[picked] += batchSize
|
||||
totalSize -= batchSize
|
||||
}
|
||||
expectSize := 2 * 1024 * 1024 * 1024
|
||||
fn := func(actual int) {
|
||||
t.Logf("actual=%d, expect*0.8=%f, expect*1.2=%f", actual, float64(expectSize)*0.9, float64(expectSize)*1.1)
|
||||
assert.True(t, float64(actual) > float64(expectSize)*0.8)
|
||||
assert.True(t, float64(actual) < float64(expectSize)*1.2)
|
||||
}
|
||||
fn(importedSize[int64(100)])
|
||||
fn(importedSize[int64(101)])
|
||||
fn(importedSize[int64(102)])
|
||||
fn(importedSize[int64(103)])
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user