enhance: add param to modify dump snapshot batch size (#44215)

issue: #44216

Signed-off-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-09-05 14:29:54 +08:00 committed by GitHub
parent db6595d7a5
commit d67f1ea0ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 75 additions and 28 deletions

View File

@ -506,6 +506,7 @@ queryNode:
# If set to 0, time based eviction is disabled. # If set to 0, time based eviction is disabled.
cacheTtl: 0 cacheTtl: 0
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
deleteDumpBatchSize: 10000 # Batch size for delete snapshot dump in segcore.
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
enableDisk: false # enable querynode load disk index, and search on disk index enableDisk: false # enable querynode load disk index, and search on disk index
maxDiskUsagePercentage: 95 maxDiskUsagePercentage: 95

View File

@ -23,6 +23,7 @@ namespace milvus {
std::atomic<int64_t> FILE_SLICE_SIZE(DEFAULT_INDEX_FILE_SLICE_SIZE); std::atomic<int64_t> FILE_SLICE_SIZE(DEFAULT_INDEX_FILE_SLICE_SIZE);
std::atomic<int64_t> EXEC_EVAL_EXPR_BATCH_SIZE( std::atomic<int64_t> EXEC_EVAL_EXPR_BATCH_SIZE(
DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE); DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE);
std::atomic<int64_t> DELETE_DUMP_BATCH_SIZE(DEFAULT_DELETE_DUMP_BATCH_SIZE);
std::atomic<bool> OPTIMIZE_EXPR_ENABLED(DEFAULT_OPTIMIZE_EXPR_ENABLED); std::atomic<bool> OPTIMIZE_EXPR_ENABLED(DEFAULT_OPTIMIZE_EXPR_ENABLED);
std::atomic<bool> GROWING_JSON_KEY_STATS_ENABLED( std::atomic<bool> GROWING_JSON_KEY_STATS_ENABLED(
@ -43,6 +44,13 @@ SetDefaultExecEvalExprBatchSize(int64_t val) {
EXEC_EVAL_EXPR_BATCH_SIZE.load()); EXEC_EVAL_EXPR_BATCH_SIZE.load());
} }
void
SetDefaultDeleteDumpBatchSize(int64_t val) {
DELETE_DUMP_BATCH_SIZE.store(val);
LOG_INFO("set default delete dump batch size: {}",
DELETE_DUMP_BATCH_SIZE.load());
}
void void
SetDefaultOptimizeExprEnable(bool val) { SetDefaultOptimizeExprEnable(bool val) {
OPTIMIZE_EXPR_ENABLED.store(val); OPTIMIZE_EXPR_ENABLED.store(val);

View File

@ -27,6 +27,7 @@ namespace milvus {
extern std::atomic<int64_t> FILE_SLICE_SIZE; extern std::atomic<int64_t> FILE_SLICE_SIZE;
extern std::atomic<int64_t> EXEC_EVAL_EXPR_BATCH_SIZE; extern std::atomic<int64_t> EXEC_EVAL_EXPR_BATCH_SIZE;
extern std::atomic<int64_t> DELETE_DUMP_BATCH_SIZE;
extern std::atomic<bool> OPTIMIZE_EXPR_ENABLED; extern std::atomic<bool> OPTIMIZE_EXPR_ENABLED;
extern std::atomic<bool> GROWING_JSON_KEY_STATS_ENABLED; extern std::atomic<bool> GROWING_JSON_KEY_STATS_ENABLED;
extern std::atomic<bool> CONFIG_PARAM_TYPE_CHECK_ENABLED; extern std::atomic<bool> CONFIG_PARAM_TYPE_CHECK_ENABLED;
@ -37,6 +38,9 @@ SetIndexSliceSize(const int64_t size);
void void
SetDefaultExecEvalExprBatchSize(int64_t val); SetDefaultExecEvalExprBatchSize(int64_t val);
void
SetDefaultDeleteDumpBatchSize(int64_t val);
void void
SetDefaultOptimizeExprEnable(bool val); SetDefaultOptimizeExprEnable(bool val);

View File

@ -77,6 +77,8 @@ const int64_t DEFAULT_INDEX_FILE_SLICE_SIZE = 16 << 20; // bytes
const int64_t DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE = 8192; const int64_t DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE = 8192;
const int64_t DEFAULT_DELETE_DUMP_BATCH_SIZE = 10000;
constexpr const char* RADIUS = knowhere::meta::RADIUS; constexpr const char* RADIUS = knowhere::meta::RADIUS;
constexpr const char* RANGE_FILTER = knowhere::meta::RANGE_FILTER; constexpr const char* RANGE_FILTER = knowhere::meta::RANGE_FILTER;

View File

@ -55,6 +55,11 @@ SetDefaultExprEvalBatchSize(int64_t val) {
milvus::SetDefaultExecEvalExprBatchSize(val); milvus::SetDefaultExecEvalExprBatchSize(val);
} }
void
SetDefaultDeleteDumpBatchSize(int64_t val) {
milvus::SetDefaultDeleteDumpBatchSize(val);
}
void void
SetDefaultOptimizeExprEnable(bool val) { SetDefaultOptimizeExprEnable(bool val) {
milvus::SetDefaultOptimizeExprEnable(val); milvus::SetDefaultOptimizeExprEnable(val);

View File

@ -42,6 +42,9 @@ SetLowPriorityThreadCoreCoefficient(const float);
void void
SetDefaultExprEvalBatchSize(int64_t val); SetDefaultExprEvalBatchSize(int64_t val);
void
SetDefaultDeleteDumpBatchSize(int64_t val);
void void
SetDefaultOptimizeExprEnable(bool val); SetDefaultOptimizeExprEnable(bool val);

View File

@ -47,7 +47,6 @@ struct Comparator {
using SortedDeleteList = using SortedDeleteList =
folly::ConcurrentSkipList<std::pair<Timestamp, Offset>, Comparator>; folly::ConcurrentSkipList<std::pair<Timestamp, Offset>, Comparator>;
static int32_t DUMP_BATCH_SIZE = 10000;
static int32_t DELETE_PAIR_SIZE = sizeof(std::pair<Timestamp, Offset>); static int32_t DELETE_PAIR_SIZE = sizeof(std::pair<Timestamp, Offset>);
template <bool is_sealed = false> template <bool is_sealed = false>
@ -105,15 +104,7 @@ class DeletedRecord {
bool can_dump = timestamps[0] >= max_load_timestamp_; bool can_dump = timestamps[0] >= max_load_timestamp_;
if (can_dump) { if (can_dump) {
auto start_time = std::chrono::steady_clock::now();
DumpSnapshot(); DumpSnapshot();
auto end_time = std::chrono::steady_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(
end_time - start_time);
LOG_INFO("dump delete record snapshot cost: {}ms for segment: {}",
duration.count() / 1000,
segment_id_);
} }
} }
@ -241,11 +232,11 @@ class DeletedRecord {
void void
DumpSnapshot() { DumpSnapshot() {
std::unique_lock<std::shared_mutex> lock(snap_lock_);
SortedDeleteList::Accessor accessor(deleted_lists_); SortedDeleteList::Accessor accessor(deleted_lists_);
int total_size = accessor.size(); int total_size = accessor.size();
while (total_size - dumped_entry_count_.load() > DUMP_BATCH_SIZE) { while (total_size - dumped_entry_count_.load() >
DELETE_DUMP_BATCH_SIZE) {
int32_t bitsize = 0; int32_t bitsize = 0;
if constexpr (is_sealed) { if constexpr (is_sealed) {
bitsize = sealed_row_count_; bitsize = sealed_row_count_;
@ -263,30 +254,34 @@ class DeletedRecord {
snapshots_.back().second.size()); snapshots_.back().second.size());
} }
while (total_size - dumped_entry_count_.load() > DUMP_BATCH_SIZE && while (total_size - dumped_entry_count_.load() >
DELETE_DUMP_BATCH_SIZE &&
it != accessor.end()) { it != accessor.end()) {
Timestamp dump_ts = 0; Timestamp dump_ts = 0;
for (auto size = 0; size < DUMP_BATCH_SIZE; ++it, ++size) { for (auto size = 0;
size < DELETE_DUMP_BATCH_SIZE && it != accessor.end();
++it, ++size) {
bitmap.set(it->second); bitmap.set(it->second);
if (size == DUMP_BATCH_SIZE - 1) { dump_ts = it->first;
dump_ts = it->first; }
{
std::unique_lock<std::shared_mutex> lock(snap_lock_);
if (dump_ts == last_dump_ts) {
// only update
snapshots_.back().second = std::move(bitmap.clone());
snap_next_iter_.back() = it;
} else {
// add new snapshot
snapshots_.push_back(
std::make_pair(dump_ts, bitmap.clone()));
Assert(it != accessor.end() && it.good());
snap_next_iter_.push_back(it);
} }
} }
if (dump_ts == last_dump_ts) { dumped_entry_count_.fetch_add(DELETE_DUMP_BATCH_SIZE);
// only update
snapshots_.back().second = std::move(bitmap.clone());
snap_next_iter_.back() = it;
} else {
// add new snapshot
snapshots_.push_back(
std::make_pair(dump_ts, bitmap.clone()));
Assert(it != accessor.end() && it.good());
snap_next_iter_.push_back(it);
}
dumped_entry_count_.fetch_add(DUMP_BATCH_SIZE);
LOG_INFO( LOG_INFO(
"dump delete record snapshot at ts: {}, cursor: {}, " "dump delete record snapshot at ts: {}, cursor: {}, "
"total size:{} " "total size:{} "

View File

@ -300,6 +300,9 @@ func (node *QueryNode) InitSegcore() error {
cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64()) cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64())
C.SetDefaultExprEvalBatchSize(cExprBatchSize) C.SetDefaultExprEvalBatchSize(cExprBatchSize)
cDeleteDumpBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.DeleteDumpBatchSize.GetAsInt64())
C.SetDefaultDeleteDumpBatchSize(cDeleteDumpBatchSize)
cOptimizeExprEnabled := C.bool(paramtable.Get().CommonCfg.EnabledOptimizeExpr.GetAsBool()) cOptimizeExprEnabled := C.bool(paramtable.Get().CommonCfg.EnabledOptimizeExpr.GetAsBool())
C.SetDefaultOptimizeExprEnable(cOptimizeExprEnabled) C.SetDefaultOptimizeExprEnable(cOptimizeExprEnabled)

View File

@ -493,6 +493,15 @@ func SetupCoreConfigChangelCallback() {
return nil return nil
}) })
paramtable.Get().QueryNodeCfg.DeleteDumpBatchSize.RegisterCallback(func(ctx context.Context, key, oldValue, newValue string) error {
size, err := strconv.Atoi(newValue)
if err != nil {
return err
}
UpdateDefaultDeleteDumpBatchSize(size)
return nil
})
paramtable.Get().QueryNodeCfg.ExprResCacheEnabled.RegisterCallback(func(ctx context.Context, key, oldValue, newValue string) error { paramtable.Get().QueryNodeCfg.ExprResCacheEnabled.RegisterCallback(func(ctx context.Context, key, oldValue, newValue string) error {
enable, err := strconv.ParseBool(newValue) enable, err := strconv.ParseBool(newValue)
if err != nil { if err != nil {

View File

@ -60,6 +60,10 @@ func UpdateDefaultExprEvalBatchSize(size int) {
C.SetDefaultExprEvalBatchSize(C.int64_t(size)) C.SetDefaultExprEvalBatchSize(C.int64_t(size))
} }
func UpdateDefaultDeleteDumpBatchSize(size int) {
C.SetDefaultDeleteDumpBatchSize(C.int64_t(size))
}
func UpdateDefaultOptimizeExprEnable(enable bool) { func UpdateDefaultOptimizeExprEnable(enable bool) {
C.SetDefaultOptimizeExprEnable(C.bool(enable)) C.SetDefaultOptimizeExprEnable(C.bool(enable))
} }

View File

@ -3041,6 +3041,10 @@ type queryNodeConfig struct {
EnableWorkerSQCostMetrics ParamItem `refreshable:"true"` EnableWorkerSQCostMetrics ParamItem `refreshable:"true"`
ExprEvalBatchSize ParamItem `refreshable:"false"` ExprEvalBatchSize ParamItem `refreshable:"false"`
// delete snapshot dump batch size
DeleteDumpBatchSize ParamItem `refreshable:"false"`
// expr cache // expr cache
ExprResCacheEnabled ParamItem `refreshable:"false"` ExprResCacheEnabled ParamItem `refreshable:"false"`
ExprResCacheCapacityBytes ParamItem `refreshable:"false"` ExprResCacheCapacityBytes ParamItem `refreshable:"false"`
@ -4058,6 +4062,15 @@ user-task-polling:
} }
p.ExprEvalBatchSize.Init(base.mgr) p.ExprEvalBatchSize.Init(base.mgr)
p.DeleteDumpBatchSize = ParamItem{
Key: "queryNode.segcore.deleteDumpBatchSize",
Version: "2.6.2",
DefaultValue: "10000",
Doc: "Batch size for delete snapshot dump in segcore.",
Export: true,
}
p.DeleteDumpBatchSize.Init(base.mgr)
// expr cache // expr cache
p.ExprResCacheEnabled = ParamItem{ p.ExprResCacheEnabled = ParamItem{
Key: "queryNode.exprCache.enabled", Key: "queryNode.exprCache.enabled",