mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix: Fix import segment size is uneven (#33605)
The data coordinator computed the appropriate number of import segments, thus when importing in the data node, one can randomly select a segment. issue: https://github.com/milvus-io/milvus/issues/33604 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
545d4725fb
commit
bbdf99a45e
@ -224,7 +224,6 @@ func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Futu
|
|||||||
log.Info("start to sync import data", WrapLogFields(task)...)
|
log.Info("start to sync import data", WrapLogFields(task)...)
|
||||||
futures := make([]*conc.Future[struct{}], 0)
|
futures := make([]*conc.Future[struct{}], 0)
|
||||||
syncTasks := make([]syncmgr.Task, 0)
|
syncTasks := make([]syncmgr.Task, 0)
|
||||||
segmentImportedSizes := make(map[int64]int)
|
|
||||||
for channelIdx, datas := range hashedData {
|
for channelIdx, datas := range hashedData {
|
||||||
channel := task.GetVchannels()[channelIdx]
|
channel := task.GetVchannels()[channelIdx]
|
||||||
for partitionIdx, data := range datas {
|
for partitionIdx, data := range datas {
|
||||||
@ -232,13 +231,11 @@ func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Futu
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
partitionID := task.GetPartitionIDs()[partitionIdx]
|
partitionID := task.GetPartitionIDs()[partitionIdx]
|
||||||
size := data.GetMemorySize()
|
segmentID := PickSegment(task, channel, partitionID)
|
||||||
segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size)
|
|
||||||
syncTask, err := NewSyncTask(task.ctx, task, segmentID, partitionID, channel, data)
|
syncTask, err := NewSyncTask(task.ctx, task, segmentID, partitionID, channel, data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
segmentImportedSizes[segmentID] += size
|
|
||||||
future := t.syncMgr.SyncData(task.ctx, syncTask)
|
future := t.syncMgr.SyncData(task.ctx, syncTask)
|
||||||
futures = append(futures, future)
|
futures = append(futures, future)
|
||||||
syncTasks = append(syncTasks, syncTask)
|
syncTasks = append(syncTasks, syncTask)
|
||||||
|
|||||||
@ -19,7 +19,9 @@ package importv2
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
@ -34,7 +36,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -99,28 +100,13 @@ func NewImportSegmentInfo(syncTask syncmgr.Task, task *ImportTask) (*datapb.Impo
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func PickSegment(task *ImportTask, segmentImportedSizes map[int64]int, vchannel string, partitionID int64, sizeToImport int) int64 {
|
func PickSegment(task *ImportTask, vchannel string, partitionID int64) int64 {
|
||||||
candidates := lo.Filter(task.req.GetRequestSegments(), func(info *datapb.ImportRequestSegment, _ int) bool {
|
candidates := lo.Filter(task.req.GetRequestSegments(), func(info *datapb.ImportRequestSegment, _ int) bool {
|
||||||
return info.GetVchannel() == vchannel && info.GetPartitionID() == partitionID
|
return info.GetVchannel() == vchannel && info.GetPartitionID() == partitionID
|
||||||
})
|
})
|
||||||
|
|
||||||
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt() * 1024 * 1024
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
return candidates[r.Intn(len(candidates))].GetSegmentID()
|
||||||
for _, candidate := range candidates {
|
|
||||||
sizeImported := segmentImportedSizes[candidate.GetSegmentID()]
|
|
||||||
if sizeImported+sizeToImport <= segmentMaxSize {
|
|
||||||
return candidate.GetSegmentID()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
segmentID := lo.MinBy(task.GetSegmentsInfo(), func(s1, s2 *datapb.ImportSegmentInfo) bool {
|
|
||||||
return segmentImportedSizes[s1.GetSegmentID()] < segmentImportedSizes[s2.GetSegmentID()]
|
|
||||||
}).GetSegmentID()
|
|
||||||
log.Warn("failed to pick an appropriate segment, opt for the smallest one instead",
|
|
||||||
WrapLogFields(task, zap.Int64("segmentID", segmentID),
|
|
||||||
zap.Int("sizeToImport", sizeToImport),
|
|
||||||
zap.Int("sizeImported", segmentImportedSizes[segmentID]),
|
|
||||||
zap.Int("segmentMaxSize", segmentMaxSize))...)
|
|
||||||
return segmentID
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func CheckRowsEqual(schema *schemapb.CollectionSchema, data *storage.InsertData) error {
|
func CheckRowsEqual(schema *schemapb.CollectionSchema, data *storage.InsertData) error {
|
||||||
|
|||||||
@ -113,3 +113,57 @@ func Test_UnsetAutoID(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_PickSegment(t *testing.T) {
|
||||||
|
const (
|
||||||
|
vchannel = "ch-0"
|
||||||
|
partitionID = 10
|
||||||
|
)
|
||||||
|
task := &ImportTask{
|
||||||
|
req: &datapb.ImportRequest{
|
||||||
|
RequestSegments: []*datapb.ImportRequestSegment{
|
||||||
|
{
|
||||||
|
SegmentID: 100,
|
||||||
|
PartitionID: partitionID,
|
||||||
|
Vchannel: vchannel,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
SegmentID: 101,
|
||||||
|
PartitionID: partitionID,
|
||||||
|
Vchannel: vchannel,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
SegmentID: 102,
|
||||||
|
PartitionID: partitionID,
|
||||||
|
Vchannel: vchannel,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
SegmentID: 103,
|
||||||
|
PartitionID: partitionID,
|
||||||
|
Vchannel: vchannel,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
importedSize := map[int64]int{}
|
||||||
|
|
||||||
|
totalSize := 8 * 1024 * 1024 * 1024
|
||||||
|
batchSize := 16 * 1024 * 1024
|
||||||
|
|
||||||
|
for totalSize > 0 {
|
||||||
|
picked := PickSegment(task, vchannel, partitionID)
|
||||||
|
importedSize[picked] += batchSize
|
||||||
|
totalSize -= batchSize
|
||||||
|
}
|
||||||
|
expectSize := 2 * 1024 * 1024 * 1024
|
||||||
|
fn := func(actual int) {
|
||||||
|
t.Logf("actual=%d, expect*0.8=%f, expect*1.2=%f", actual, float64(expectSize)*0.9, float64(expectSize)*1.1)
|
||||||
|
assert.True(t, float64(actual) > float64(expectSize)*0.8)
|
||||||
|
assert.True(t, float64(actual) < float64(expectSize)*1.2)
|
||||||
|
}
|
||||||
|
fn(importedSize[int64(100)])
|
||||||
|
fn(importedSize[int64(101)])
|
||||||
|
fn(importedSize[int64(102)])
|
||||||
|
fn(importedSize[int64(103)])
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user