mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: [StorageV2] Pass args for avg size split policy (#44301)
Related to #44257 This PR - Pass column stats for avg size split policy - Add param items for policy configuration --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
e821468d2a
commit
fc968ff1c2
@ -1010,6 +1010,13 @@ common:
|
||||
maxWLockConditionalWaitTime: 600 # maximum seconds for waiting wlock conditional
|
||||
storage:
|
||||
enablev2: true
|
||||
stv2:
|
||||
splitSystemColumn:
|
||||
enabled: true # enable split system column policy in storage v2
|
||||
includePK: true # whether split system column policy include pk field
|
||||
splitByAvgSize:
|
||||
enabled: false # enable split by average size policy in storage v2
|
||||
threshold: 1024 # split by average size policy threshold(in bytes) in storage v2
|
||||
# Whether to disable the internal time messaging mechanism for the system.
|
||||
# If disabled (set to false), the system will not allow DML operations, including insertion, deletion, queries, and searches.
|
||||
# This helps Milvus-CDC synchronize incremental data
|
||||
|
||||
@ -239,7 +239,28 @@ func (t *SyncTask) getColumnGroups(segmentInfo *metacache.SegmentInfo) []storage
|
||||
|
||||
// TODO calculate field stats
|
||||
policies := storagecommon.DefaultPolicies()
|
||||
return storagecommon.SplitColumns(allFields, map[int64]storagecommon.ColumnStats{}, policies...)
|
||||
return storagecommon.SplitColumns(allFields, t.calcColumnStats(), policies...)
|
||||
}
|
||||
|
||||
func (t *SyncTask) calcColumnStats() map[int64]storagecommon.ColumnStats {
|
||||
result := make(map[int64]storagecommon.ColumnStats)
|
||||
|
||||
memorySizes := make(map[int64]int64)
|
||||
rowNums := make(map[int64]int64)
|
||||
for _, data := range t.pack.insertData {
|
||||
for fieldID, fieldData := range data.Data {
|
||||
memorySizes[fieldID] += int64(fieldData.GetMemorySize())
|
||||
rowNums[fieldID] += int64(fieldData.RowNum())
|
||||
}
|
||||
}
|
||||
for fieldID, rowNum := range rowNums {
|
||||
if rowNum > 0 {
|
||||
result[fieldID] = storagecommon.ColumnStats{
|
||||
AvgSize: memorySizes[fieldID] / rowNum,
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// writeMeta updates segments via meta writer in option.
|
||||
|
||||
@ -351,7 +351,7 @@ func (pw *PackedBinlogRecordWriter) initWriters(r Record) error {
|
||||
if pw.writer == nil {
|
||||
if len(pw.columnGroups) == 0 {
|
||||
allFields := typeutil.GetAllFieldSchemas(pw.schema)
|
||||
pw.columnGroups = storagecommon.SplitColumns(allFields, map[int64]storagecommon.ColumnStats{}, storagecommon.DefaultPolicies()...)
|
||||
pw.columnGroups = storagecommon.SplitColumns(allFields, pw.getColumnStatsFromRecord(r, allFields), storagecommon.DefaultPolicies()...)
|
||||
}
|
||||
logIdStart, _, err := pw.allocator.Alloc(uint32(len(pw.columnGroups)))
|
||||
if err != nil {
|
||||
@ -371,6 +371,18 @@ func (pw *PackedBinlogRecordWriter) initWriters(r Record) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pw *PackedBinlogRecordWriter) getColumnStatsFromRecord(r Record, allFields []*schemapb.FieldSchema) map[int64]storagecommon.ColumnStats {
|
||||
result := make(map[int64]storagecommon.ColumnStats)
|
||||
for _, field := range allFields {
|
||||
if arr := r.Column(field.FieldID); arr != nil {
|
||||
result[field.FieldID] = storagecommon.ColumnStats{
|
||||
AvgSize: int64(arr.Data().SizeInBytes()) / int64(arr.Len()),
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (pw *PackedBinlogRecordWriter) GetWrittenUncompressed() uint64 {
|
||||
return pw.writtenUncompressed
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@ import (
|
||||
"sort"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
)
|
||||
|
||||
@ -46,11 +47,18 @@ func SplitColumns(fields []*schemapb.FieldSchema, stats map[int64]ColumnStats, p
|
||||
}
|
||||
|
||||
func DefaultPolicies() []ColumnGroupSplitPolicy {
|
||||
return []ColumnGroupSplitPolicy{
|
||||
NewSystemColumnPolicy(true),
|
||||
NewSelectedDataTypePolicy(),
|
||||
NewRemanentShortPolicy(-1),
|
||||
paramtable.Init()
|
||||
result := make([]ColumnGroupSplitPolicy, 0, 4)
|
||||
if paramtable.Get().CommonCfg.Stv2SplitSystemColumn.GetAsBool() {
|
||||
result = append(result, NewSystemColumnPolicy(paramtable.Get().CommonCfg.Stv2SystemColumnIncludePK.GetAsBool()))
|
||||
}
|
||||
if paramtable.Get().CommonCfg.Stv2SplitByAvgSize.GetAsBool() {
|
||||
result = append(result, NewAvgSizePolicy(paramtable.Get().CommonCfg.Stv2SplitAvgSizeThreshold.GetAsInt64()))
|
||||
}
|
||||
result = append(result,
|
||||
NewSelectedDataTypePolicy(),
|
||||
NewRemanentShortPolicy(-1))
|
||||
return result
|
||||
}
|
||||
|
||||
func IsVectorDataType(dataType schemapb.DataType) bool {
|
||||
|
||||
@ -279,7 +279,13 @@ type commonConfig struct {
|
||||
LockSlowLogWarnThreshold ParamItem `refreshable:"true"`
|
||||
MaxWLockConditionalWaitTime ParamItem `refreshable:"true"`
|
||||
|
||||
// storage v2
|
||||
EnableStorageV2 ParamItem `refreshable:"false"`
|
||||
Stv2SplitSystemColumn ParamItem `refreshable:"true"`
|
||||
Stv2SystemColumnIncludePK ParamItem `refreshable:"true"`
|
||||
Stv2SplitByAvgSize ParamItem `refreshable:"true"`
|
||||
Stv2SplitAvgSizeThreshold ParamItem `refreshable:"true"`
|
||||
|
||||
StoragePathPrefix ParamItem `refreshable:"false"`
|
||||
StorageZstdConcurrency ParamItem `refreshable:"false"`
|
||||
TTMsgEnabled ParamItem `refreshable:"true"`
|
||||
@ -926,6 +932,42 @@ Large numeric passwords require double quotes to avoid yaml parsing precision is
|
||||
}
|
||||
p.EnableStorageV2.Init(base.mgr)
|
||||
|
||||
p.Stv2SplitSystemColumn = ParamItem{
|
||||
Key: "common.storage.stv2.splitSystemColumn.enabled",
|
||||
Version: "2.6.2",
|
||||
DefaultValue: "true",
|
||||
Doc: "enable split system column policy in storage v2",
|
||||
Export: true,
|
||||
}
|
||||
p.Stv2SplitSystemColumn.Init(base.mgr)
|
||||
|
||||
p.Stv2SystemColumnIncludePK = ParamItem{
|
||||
Key: "common.storage.stv2.splitSystemColumn.includePK",
|
||||
Version: "2.6.2",
|
||||
DefaultValue: "true",
|
||||
Doc: "whether split system column policy include pk field",
|
||||
Export: true,
|
||||
}
|
||||
p.Stv2SystemColumnIncludePK.Init(base.mgr)
|
||||
|
||||
p.Stv2SplitByAvgSize = ParamItem{
|
||||
Key: "common.storage.stv2.splitByAvgSize.enabled",
|
||||
Version: "2.6.2",
|
||||
DefaultValue: "false",
|
||||
Doc: "enable split by average size policy in storage v2",
|
||||
Export: true,
|
||||
}
|
||||
p.Stv2SplitByAvgSize.Init(base.mgr)
|
||||
|
||||
p.Stv2SplitAvgSizeThreshold = ParamItem{
|
||||
Key: "common.storage.stv2.splitByAvgSize.threshold",
|
||||
Version: "2.6.2",
|
||||
DefaultValue: "1024",
|
||||
Doc: "split by average size policy threshold(in bytes) in storage v2",
|
||||
Export: true,
|
||||
}
|
||||
p.Stv2SplitAvgSizeThreshold.Init(base.mgr)
|
||||
|
||||
p.StoragePathPrefix = ParamItem{
|
||||
Key: "common.storage.pathPrefix",
|
||||
Version: "2.3.4",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user