fix:fix undefined behavior for dump snapshot (#45611)

#45610

this fix add a little cost for execute:
=== Lower Bound Overhead (isolated) ===
Position 1 (list len = 90000): 39 ns per lower_bound
Position 2 (list len =180000): 45 ns per lower_bound
Position 3 (list len =270000): 46 ns per lower_bound
Position 4 (list len =360000): 38 ns per lower_bound
Position 5 (list len =450000): 42 ns per lower_bound
Position 6 (list len =540000): 55 ns per lower_bound
Position 7 (list len =630000): 56 ns per lower_bound
Position 8 (list len =720000): 49 ns per lower_bound
Position 9 (list len =810000): 48 ns per lower_bound

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-11-25 14:17:07 +08:00 committed by GitHub
parent 929cb42fcc
commit 346449d87f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -198,7 +198,8 @@ class DeletedRecord {
loc--; loc--;
} }
if (loc >= 0) { if (loc >= 0) {
next_iter = snap_next_iter_[loc]; // use lower_bound to relocate the iterator in current accessor
next_iter = accessor.lower_bound(snap_next_pos_[loc]);
auto or_size = auto or_size =
std::min(snapshots_[loc].second.size(), bitset.size()); std::min(snapshots_[loc].second.size(), bitset.size());
bitset.inplace_or_with_count(snapshots_[loc].second, bitset.inplace_or_with_count(snapshots_[loc].second,
@ -251,8 +252,7 @@ class DeletedRecord {
auto it = accessor.begin(); auto it = accessor.begin();
Timestamp last_dump_ts = 0; Timestamp last_dump_ts = 0;
if (!snapshots_.empty()) { if (!snapshots_.empty()) {
it = snap_next_iter_.back(); it = accessor.lower_bound(snap_next_pos_.back());
last_dump_ts = snapshots_.back().first;
bitmap.inplace_or_with_count(snapshots_.back().second, bitmap.inplace_or_with_count(snapshots_.back().second,
snapshots_.back().second.size()); snapshots_.back().second.size());
} }
@ -274,13 +274,14 @@ class DeletedRecord {
if (dump_ts == last_dump_ts) { if (dump_ts == last_dump_ts) {
// only update // only update
snapshots_.back().second = std::move(bitmap.clone()); snapshots_.back().second = std::move(bitmap.clone());
snap_next_iter_.back() = it; Assert(it != accessor.end() && it.good());
snap_next_pos_.back() = *it;
} else { } else {
// add new snapshot // add new snapshot
snapshots_.push_back( snapshots_.push_back(
std::make_pair(dump_ts, bitmap.clone())); std::make_pair(dump_ts, bitmap.clone()));
Assert(it != accessor.end() && it.good()); Assert(it != accessor.end() && it.good());
snap_next_iter_.push_back(it); snap_next_pos_.push_back(*it);
} }
} }
@ -346,8 +347,9 @@ class DeletedRecord {
// dump snapshot low frequency // dump snapshot low frequency
mutable std::shared_mutex snap_lock_; mutable std::shared_mutex snap_lock_;
std::vector<std::pair<Timestamp, BitsetType>> snapshots_; std::vector<std::pair<Timestamp, BitsetType>> snapshots_;
// next delete record iterator that follows every snapshot // next delete record position that follows every snapshot
std::vector<SortedDeleteList::iterator> snap_next_iter_; // store position (timestamp, offset)
std::vector<std::pair<Timestamp, Offset>> snap_next_pos_;
// total number of delete entries that have been incorporated into snapshots // total number of delete entries that have been incorporated into snapshots
std::atomic<int64_t> dumped_entry_count_{0}; std::atomic<int64_t> dumped_entry_count_{0};
// estimated memory size of DeletedRecord, only used for sealed segment // estimated memory size of DeletedRecord, only used for sealed segment