From 6c19f9baf8d809ee42a3bb1331628383c2a2c194 Mon Sep 17 00:00:00 2001 From: Chun Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Mon, 22 Jul 2024 20:57:43 +0800 Subject: [PATCH] enhance: optimize search reduce perf(#32507) (#34607) related: #32507 Signed-off-by: MrPresent-Han Co-authored-by: MrPresent-Han --- internal/core/src/segcore/reduce/Reduce.cpp | 28 +++++++++------------ 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/internal/core/src/segcore/reduce/Reduce.cpp b/internal/core/src/segcore/reduce/Reduce.cpp index b0086d6901..0c26608ace 100644 --- a/internal/core/src/segcore/reduce/Reduce.cpp +++ b/internal/core/src/segcore/reduce/Reduce.cpp @@ -91,17 +91,21 @@ ReduceHelper::FilterInvalidSearchResult(SearchResult* search_result) { auto& offsets = search_result->seg_offsets_; auto& distances = search_result->distances_; + int segment_row_count = segment->get_row_count(); + //1. for sealed segment, segment_row_count will not change as delete records will take effect as bitset + //2. for growing segment, segment_row_count is the minimum position acknowledged, which will only increase after + //the time at which the search operation is executed, so it's safe here to keep this value inside stack for (auto i = 0; i < nq; ++i) { for (auto j = 0; j < topK; ++j) { auto index = i * topK + j; if (offsets[index] != INVALID_SEG_OFFSET) { AssertInfo(0 <= offsets[index] && - offsets[index] < segment->get_row_count(), + offsets[index] < segment_row_count, fmt::format("invalid offset {}, segment {} with " "rows num {}, data or index corruption", offsets[index], segment->get_segment_id(), - segment->get_row_count())); + segment_row_count)); real_topks[i]++; offsets[valid_index] = offsets[index]; distances[valid_index] = distances[index]; @@ -160,27 +164,19 @@ void ReduceHelper::RefreshSingleSearchResult(SearchResult* search_result, int seg_res_idx, std::vector& real_topks) { - uint32_t size = 0; - for (int j = 0; j < total_nq_; j++) { - size += final_search_records_[seg_res_idx][j].size(); - } - std::vector primary_keys(size); - std::vector distances(size); - std::vector seg_offsets(size); - uint32_t index = 0; for (int j = 0; j < total_nq_; j++) { for (auto offset : final_search_records_[seg_res_idx][j]) { - primary_keys[index] = search_result->primary_keys_[offset]; - distances[index] = search_result->distances_[offset]; - seg_offsets[index] = search_result->seg_offsets_[offset]; + search_result->primary_keys_[index] = search_result->primary_keys_[offset]; + search_result->distances_[index] = search_result->distances_[offset]; + search_result->seg_offsets_[index] = search_result->seg_offsets_[offset]; index++; real_topks[j]++; } } - search_result->primary_keys_.swap(primary_keys); - search_result->distances_.swap(distances); - search_result->seg_offsets_.swap(seg_offsets); + search_result->primary_keys_.resize(index); + search_result->distances_.resize(index); + search_result->seg_offsets_.resize(index); } void