mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Improve load performance (#17273)
Load segment's fields concurrently Signed-off-by: yah01 <yang.cen@zilliz.com>
This commit is contained in:
parent
7c52a8c535
commit
4e83f37899
@ -339,7 +339,7 @@ func loadIndexForSegment(ctx context.Context, node *QueryNode, segmentID UniqueI
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -71,28 +71,27 @@ func (loader *segmentLoader) getFieldType(segment *Segment, fieldID FieldID) (sc
|
|||||||
return coll.getFieldType(fieldID)
|
return coll.getFieldType(fieldID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (loader *segmentLoader) loadSegment(req *querypb.LoadSegmentsRequest, segmentType segmentType) error {
|
func (loader *segmentLoader) LoadSegment(req *querypb.LoadSegmentsRequest, segmentType segmentType) error {
|
||||||
if req.Base == nil {
|
if req.Base == nil {
|
||||||
return fmt.Errorf("nil base message when load segment, collectionID = %d", req.CollectionID)
|
return fmt.Errorf("nil base message when load segment, collectionID = %d", req.CollectionID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// no segment needs to load, return
|
// no segment needs to load, return
|
||||||
if len(req.Infos) == 0 {
|
segmentNum := len(req.Infos)
|
||||||
|
|
||||||
|
if segmentNum == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("segmentLoader start loading...",
|
log.Info("segmentLoader start loading...",
|
||||||
zap.Any("collectionID", req.CollectionID),
|
zap.Any("collectionID", req.CollectionID),
|
||||||
zap.Any("numOfSegments", len(req.Infos)),
|
zap.Any("segmentNum", segmentNum),
|
||||||
zap.Any("loadType", segmentType),
|
zap.Any("loadType", segmentType))
|
||||||
)
|
|
||||||
// check memory limit
|
// check memory limit
|
||||||
concurrencyLevel := loader.cpuPool.Cap()
|
concurrencyLevel := loader.cpuPool.Cap()
|
||||||
if len(req.Infos) > 0 && len(req.Infos[0].BinlogPaths) > 0 {
|
if concurrencyLevel > segmentNum {
|
||||||
concurrencyLevel /= len(req.Infos[0].BinlogPaths)
|
concurrencyLevel = segmentNum
|
||||||
if concurrencyLevel <= 0 {
|
|
||||||
concurrencyLevel = 1
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for ; concurrencyLevel > 1; concurrencyLevel /= 2 {
|
for ; concurrencyLevel > 1; concurrencyLevel /= 2 {
|
||||||
err := loader.checkSegmentSize(req.CollectionID, req.Infos, concurrencyLevel)
|
err := loader.checkSegmentSize(req.CollectionID, req.Infos, concurrencyLevel)
|
||||||
@ -166,9 +165,13 @@ func (loader *segmentLoader) loadSegment(req *querypb.LoadSegmentsRequest, segme
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// start to load
|
// start to load
|
||||||
// Make sure we can always benefit from concurrency, and not spawn too many idle goroutines
|
// Make sure we can always benefit from concurrency, and not spawn too many idle goroutines
|
||||||
err = funcutil.ProcessFuncParallel(len(req.Infos),
|
log.Debug("start to load segments in parallel",
|
||||||
|
zap.Int("segmentNum", segmentNum),
|
||||||
|
zap.Int("concurrencyLevel", concurrencyLevel))
|
||||||
|
err = funcutil.ProcessFuncParallel(segmentNum,
|
||||||
concurrencyLevel,
|
concurrencyLevel,
|
||||||
loadSegmentFunc, "loadSegmentFunc")
|
loadSegmentFunc, "loadSegmentFunc")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -217,8 +220,10 @@ func (loader *segmentLoader) loadSegmentInternal(segment *Segment,
|
|||||||
if segment.getType() == segmentTypeSealed {
|
if segment.getType() == segmentTypeSealed {
|
||||||
fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
||||||
for _, indexInfo := range loadInfo.IndexInfos {
|
for _, indexInfo := range loadInfo.IndexInfos {
|
||||||
fieldID := indexInfo.FieldID
|
if indexInfo != nil && indexInfo.EnableIndex {
|
||||||
fieldID2IndexInfo[fieldID] = indexInfo
|
fieldID := indexInfo.FieldID
|
||||||
|
fieldID2IndexInfo[fieldID] = indexInfo
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
indexedFieldInfos := make(map[int64]*IndexedFieldInfo)
|
indexedFieldInfos := make(map[int64]*IndexedFieldInfo)
|
||||||
@ -239,12 +244,14 @@ func (loader *segmentLoader) loadSegmentInternal(segment *Segment,
|
|||||||
if err := loader.loadIndexedFieldData(segment, indexedFieldInfos); err != nil {
|
if err := loader.loadIndexedFieldData(segment, indexedFieldInfos); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if err := loader.loadSealedSegmentFields(segment, fieldBinlogs); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
fieldBinlogs = loadInfo.BinlogPaths
|
fieldBinlogs = loadInfo.BinlogPaths
|
||||||
}
|
if err := loader.loadGrowingSegmentFields(segment, fieldBinlogs); err != nil {
|
||||||
|
return err
|
||||||
if err := loader.loadFiledBinlogData(segment, fieldBinlogs); err != nil {
|
}
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if pkFieldID == common.InvalidFieldID {
|
if pkFieldID == common.InvalidFieldID {
|
||||||
@ -275,7 +282,7 @@ func (loader *segmentLoader) filterPKStatsBinlogs(fieldBinlogs []*datapb.FieldBi
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func (loader *segmentLoader) loadFiledBinlogData(segment *Segment, fieldBinlogs []*datapb.FieldBinlog) error {
|
func (loader *segmentLoader) loadGrowingSegmentFields(segment *Segment, fieldBinlogs []*datapb.FieldBinlog) error {
|
||||||
if len(fieldBinlogs) <= 0 {
|
if len(fieldBinlogs) <= 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -290,7 +297,7 @@ func (loader *segmentLoader) loadFiledBinlogData(segment *Segment, fieldBinlogs
|
|||||||
loadFutures = append(loadFutures, futures...)
|
loadFutures = append(loadFutures, futures...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for async load result
|
// wait for async load results
|
||||||
blobs := make([]*storage.Blob, len(loadFutures))
|
blobs := make([]*storage.Blob, len(loadFutures))
|
||||||
for index, future := range loadFutures {
|
for index, future := range loadFutures {
|
||||||
if !future.OK() {
|
if !future.OK() {
|
||||||
@ -328,14 +335,64 @@ func (loader *segmentLoader) loadFiledBinlogData(segment *Segment, fieldBinlogs
|
|||||||
}
|
}
|
||||||
|
|
||||||
return loader.loadGrowingSegments(segment, rowIDData.(*storage.Int64FieldData).Data, utss, insertData)
|
return loader.loadGrowingSegments(segment, rowIDData.(*storage.Int64FieldData).Data, utss, insertData)
|
||||||
case segmentTypeSealed:
|
|
||||||
return loader.loadSealedSegments(segment, insertData)
|
|
||||||
default:
|
default:
|
||||||
err := errors.New(fmt.Sprintln("illegal segment type when load segment, collectionID = ", segment.collectionID))
|
err := fmt.Errorf("illegal segmentType=%v when load segment, collectionID=%v", segmentType, segment.collectionID)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (loader *segmentLoader) loadSealedSegmentFields(segment *Segment, fields []*datapb.FieldBinlog) error {
|
||||||
|
// Load fields concurrently
|
||||||
|
futures := make([]*concurrency.Future, 0, len(fields))
|
||||||
|
for _, field := range fields {
|
||||||
|
future := loader.loadSealedFieldAsync(segment, field)
|
||||||
|
|
||||||
|
futures = append(futures, future)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := concurrency.AwaitAll(futures...)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("log field binlogs done",
|
||||||
|
zap.Int64("collection", segment.collectionID),
|
||||||
|
zap.Int64("segment", segment.segmentID),
|
||||||
|
zap.Any("fields", fields))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// async load field of sealed segment
|
||||||
|
func (loader *segmentLoader) loadSealedFieldAsync(segment *Segment, field *datapb.FieldBinlog) *concurrency.Future {
|
||||||
|
iCodec := storage.InsertCodec{}
|
||||||
|
|
||||||
|
// Avoid consuming too much memory if no CPU worker ready,
|
||||||
|
// acquire a CPU worker before load field binlogs
|
||||||
|
return loader.cpuPool.Submit(func() (interface{}, error) {
|
||||||
|
futures := loader.loadFieldBinlogsAsync(field)
|
||||||
|
|
||||||
|
blobs := make([]*storage.Blob, len(futures))
|
||||||
|
for index, future := range futures {
|
||||||
|
if !future.OK() {
|
||||||
|
return nil, future.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
blob := future.Value().(*storage.Blob)
|
||||||
|
blobs[index] = blob
|
||||||
|
}
|
||||||
|
|
||||||
|
_, _, insertData, err := iCodec.Deserialize(blobs)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn(err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, loader.loadSealedSegments(segment, insertData)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Load binlogs concurrently into memory from KV storage asyncly
|
// Load binlogs concurrently into memory from KV storage asyncly
|
||||||
func (loader *segmentLoader) loadFieldBinlogsAsync(field *datapb.FieldBinlog) []*concurrency.Future {
|
func (loader *segmentLoader) loadFieldBinlogsAsync(field *datapb.FieldBinlog) []*concurrency.Future {
|
||||||
futures := make([]*concurrency.Future, 0, len(field.Binlogs))
|
futures := make([]*concurrency.Future, 0, len(field.Binlogs))
|
||||||
@ -361,21 +418,13 @@ func (loader *segmentLoader) loadFieldBinlogsAsync(field *datapb.FieldBinlog) []
|
|||||||
|
|
||||||
func (loader *segmentLoader) loadIndexedFieldData(segment *Segment, vecFieldInfos map[int64]*IndexedFieldInfo) error {
|
func (loader *segmentLoader) loadIndexedFieldData(segment *Segment, vecFieldInfos map[int64]*IndexedFieldInfo) error {
|
||||||
for fieldID, fieldInfo := range vecFieldInfos {
|
for fieldID, fieldInfo := range vecFieldInfos {
|
||||||
if fieldInfo.indexInfo == nil || !fieldInfo.indexInfo.EnableIndex {
|
indexInfo := fieldInfo.indexInfo
|
||||||
fieldBinlog := fieldInfo.fieldBinlog
|
err := loader.loadFieldIndexData(segment, indexInfo)
|
||||||
err := loader.loadFiledBinlogData(segment, []*datapb.FieldBinlog{fieldBinlog})
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
log.Debug("load vector field's binlog data done", zap.Int64("segmentID", segment.ID()), zap.Int64("fieldID", fieldID))
|
|
||||||
} else {
|
|
||||||
indexInfo := fieldInfo.indexInfo
|
|
||||||
err := loader.loadFieldIndexData(segment, indexInfo)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
log.Debug("load field's index data done", zap.Int64("segmentID", segment.ID()), zap.Int64("fieldID", fieldID))
|
|
||||||
}
|
}
|
||||||
|
log.Debug("load field's index data done", zap.Int64("segmentID", segment.ID()), zap.Int64("fieldID", fieldID))
|
||||||
|
|
||||||
segment.setIndexedFieldInfo(fieldID, fieldInfo)
|
segment.setIndexedFieldInfo(fieldID, fieldInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -767,7 +816,11 @@ func newSegmentLoader(
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ioPool, err := concurrency.NewPool(cpuNum*2, ants.WithPreAlloc(true))
|
ioPoolSize := cpuNum * 2
|
||||||
|
if ioPoolSize < 32 {
|
||||||
|
ioPoolSize = 32
|
||||||
|
}
|
||||||
|
ioPool, err := concurrency.NewPool(ioPoolSize, ants.WithPreAlloc(true))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("failed to create goroutine pool for segment loader",
|
log.Error("failed to create goroutine pool for segment loader",
|
||||||
zap.Error(err))
|
zap.Error(err))
|
||||||
|
|||||||
@ -70,7 +70,7 @@ func TestSegmentLoader_loadSegment(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -101,7 +101,7 @@ func TestSegmentLoader_loadSegment(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -114,7 +114,7 @@ func TestSegmentLoader_loadSegment(t *testing.T) {
|
|||||||
|
|
||||||
req := &querypb.LoadSegmentsRequest{}
|
req := &querypb.LoadSegmentsRequest{}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -182,7 +182,7 @@ func TestSegmentLoader_loadSegmentFieldsData(t *testing.T) {
|
|||||||
binlog, _, err := saveBinLog(ctx, defaultCollectionID, defaultPartitionID, defaultSegmentID, defaultMsgLength, schema)
|
binlog, _, err := saveBinLog(ctx, defaultCollectionID, defaultPartitionID, defaultSegmentID, defaultMsgLength, schema)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
err = loader.loadFiledBinlogData(segment, binlog)
|
err = loader.loadSealedSegmentFields(segment, binlog)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,7 +235,7 @@ func TestSegmentLoader_invalid(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -273,7 +273,7 @@ func TestSegmentLoader_invalid(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -298,7 +298,7 @@ func TestSegmentLoader_invalid(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, commonpb.SegmentState_Dropped)
|
err = loader.LoadSegment(req, commonpb.SegmentState_Dropped)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -416,7 +416,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req1, segmentTypeSealed)
|
err = loader.LoadSegment(req1, segmentTypeSealed)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
segment1, err := loader.metaReplica.getSegmentByID(segmentID1, segmentTypeSealed)
|
segment1, err := loader.metaReplica.getSegmentByID(segmentID1, segmentTypeSealed)
|
||||||
@ -442,7 +442,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req2, segmentTypeSealed)
|
err = loader.LoadSegment(req2, segmentTypeSealed)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
segment2, err := loader.metaReplica.getSegmentByID(segmentID2, segmentTypeSealed)
|
segment2, err := loader.metaReplica.getSegmentByID(segmentID2, segmentTypeSealed)
|
||||||
@ -476,7 +476,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req1, segmentTypeGrowing)
|
err = loader.LoadSegment(req1, segmentTypeGrowing)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
segment1, err := loader.metaReplica.getSegmentByID(segmentID1, segmentTypeGrowing)
|
segment1, err := loader.metaReplica.getSegmentByID(segmentID1, segmentTypeGrowing)
|
||||||
@ -502,7 +502,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req2, segmentTypeGrowing)
|
err = loader.LoadSegment(req2, segmentTypeGrowing)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
segment2, err := loader.metaReplica.getSegmentByID(segmentID2, segmentTypeGrowing)
|
segment2, err := loader.metaReplica.getSegmentByID(segmentID2, segmentTypeGrowing)
|
||||||
@ -562,7 +562,7 @@ func TestSegmentLoader_testLoadSealedSegmentWithIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = loader.loadSegment(req, segmentTypeSealed)
|
err = loader.LoadSegment(req, segmentTypeSealed)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
segment, err := node.metaReplica.getSegmentByID(segmentID, segmentTypeSealed)
|
segment, err := node.metaReplica.getSegmentByID(segmentID, segmentTypeSealed)
|
||||||
|
|||||||
@ -218,7 +218,7 @@ func (w *watchDmChannelsTask) Execute(ctx context.Context) (err error) {
|
|||||||
zap.Int64("collectionID", collectionID),
|
zap.Int64("collectionID", collectionID),
|
||||||
zap.Int64s("unFlushedSegmentIDs", unFlushedSegmentIDs),
|
zap.Int64s("unFlushedSegmentIDs", unFlushedSegmentIDs),
|
||||||
)
|
)
|
||||||
err = w.node.loader.loadSegment(req, segmentTypeGrowing)
|
err = w.node.loader.LoadSegment(req, segmentTypeGrowing)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn(err.Error())
|
log.Warn(err.Error())
|
||||||
return err
|
return err
|
||||||
@ -524,7 +524,7 @@ func (l *loadSegmentsTask) PreExecute(ctx context.Context) error {
|
|||||||
func (l *loadSegmentsTask) Execute(ctx context.Context) error {
|
func (l *loadSegmentsTask) Execute(ctx context.Context) error {
|
||||||
// TODO: support db
|
// TODO: support db
|
||||||
log.Info("LoadSegmentTask Execute start", zap.Int64("msgID", l.req.Base.MsgID))
|
log.Info("LoadSegmentTask Execute start", zap.Int64("msgID", l.req.Base.MsgID))
|
||||||
err := l.node.loader.loadSegment(l.req, segmentTypeSealed)
|
err := l.node.loader.LoadSegment(l.req, segmentTypeSealed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn(err.Error())
|
log.Warn(err.Error())
|
||||||
return err
|
return err
|
||||||
|
|||||||
@ -46,8 +46,8 @@ func (future *Future) Value() interface{} {
|
|||||||
return future.value
|
return future.value
|
||||||
}
|
}
|
||||||
|
|
||||||
// True if error occurred,
|
// False if error occurred,
|
||||||
// false otherwise.
|
// true otherwise.
|
||||||
func (future *Future) OK() bool {
|
func (future *Future) OK() bool {
|
||||||
<-future.ch
|
<-future.ch
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user