diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 12a450bcc2..6b8acc1a43 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -104,3 +104,6 @@ const int64_t STORAGE_V1 = 1; const int64_t STORAGE_V2 = 2; const std::string UNKNOW_CAST_FUNCTION_NAME = "unknown"; + +// EPSILON value for comparing float numbers +const float EPSILON = 0.0000000119; diff --git a/internal/core/src/segcore/ReduceStructure.h b/internal/core/src/segcore/ReduceStructure.h index 60db9df7cb..be550affe0 100644 --- a/internal/core/src/segcore/ReduceStructure.h +++ b/internal/core/src/segcore/ReduceStructure.h @@ -57,7 +57,7 @@ struct SearchResultPair { bool operator>(const SearchResultPair& other) const { - if (std::fabs(distance_ - other.distance_) < 0.0000000119) { + if (std::fabs(distance_ - other.distance_) < EPSILON) { return primary_key_ < other.primary_key_; } return distance_ > other.distance_; diff --git a/internal/core/src/segcore/reduce/Reduce.cpp b/internal/core/src/segcore/reduce/Reduce.cpp index 6bdc8cc130..9e42ba3840 100644 --- a/internal/core/src/segcore/reduce/Reduce.cpp +++ b/internal/core/src/segcore/reduce/Reduce.cpp @@ -55,6 +55,7 @@ ReduceHelper::Initialize() { void ReduceHelper::Reduce() { FillPrimaryKey(); + SortEqualScoresByPks(); ReduceResultData(); RefreshSearchResults(); FillEntryData(); @@ -145,6 +146,83 @@ ReduceHelper::FillPrimaryKey() { num_segments_ = search_results_.size(); } +void +ReduceHelper::SortEqualScoresByPks() { + tracer::AutoSpan span("ReduceHelper::SortEqualScoresByPks", + tracer::GetRootSpan()); + for (auto& search_result : search_results_) { + for (int64_t i = 0; i < search_result->total_nq_; i++) { + auto nq_begin = search_result->topk_per_nq_prefix_sum_[i]; + auto nq_end = search_result->topk_per_nq_prefix_sum_[i + 1]; + SortEqualScoresOneNQ(nq_begin, nq_end, search_result); + } + } +} + +void +ReduceHelper::SortEqualScoresOneNQ(size_t nq_begin, + size_t nq_end, + SearchResult* search_result) { + if (nq_end - nq_begin <= 1) + return; + + size_t start = nq_begin; + while (start < nq_end) { + // find scope with same scores + size_t end = start + 1; + while (end < nq_end && + std::fabs(search_result->distances_[end] - + search_result->distances_[start]) < EPSILON) { + ++end; + } + + if (end - start > 1) { + // Create lightweight index array for sorting + std::vector indices(end - start); + std::iota(indices.begin(), indices.end(), 0); + + // Sort indices by comparing primary keys + std::sort(indices.begin(), + indices.end(), + [&search_result, start](size_t i, size_t j) { + return search_result->primary_keys_[start + i] < + search_result->primary_keys_[start + j]; + }); + + // Apply in-place cyclic permutation + for (size_t i = 0; i < indices.size();) { + size_t target = indices[i]; + if (target == i) { + ++i; + continue; + } + + // Start of a new cycle + PkType temp_pk = + std::move(search_result->primary_keys_[start + i]); + int64_t temp_offset = search_result->seg_offsets_[start + i]; + + size_t curr = i; + while (indices[curr] != i) { + size_t next = indices[curr]; + search_result->primary_keys_[start + curr] = + std::move(search_result->primary_keys_[start + next]); + search_result->seg_offsets_[start + curr] = + search_result->seg_offsets_[start + next]; + indices[curr] = curr; // Mark as processed + curr = next; + } + + search_result->primary_keys_[start + curr] = std::move(temp_pk); + search_result->seg_offsets_[start + curr] = temp_offset; + indices[curr] = curr; + } + } + + start = end; + } +} + void ReduceHelper::RefreshSearchResults() { tracer::AutoSpan span("ReduceHelper::RefreshSearchResults", diff --git a/internal/core/src/segcore/reduce/Reduce.h b/internal/core/src/segcore/reduce/Reduce.h index 0aa16222f6..555eebd810 100644 --- a/internal/core/src/segcore/reduce/Reduce.h +++ b/internal/core/src/segcore/reduce/Reduce.h @@ -89,6 +89,14 @@ class ReduceHelper { std::unique_ptr& search_res_data); + virtual void + SortEqualScoresByPks(); + + virtual void + SortEqualScoresOneNQ(size_t nq_begin, + size_t nq_end, + SearchResult* search_result); + private: void Initialize(); diff --git a/internal/core/src/segcore/reduce/StreamReduce.h b/internal/core/src/segcore/reduce/StreamReduce.h index 10a138fd15..49f72b5047 100644 --- a/internal/core/src/segcore/reduce/StreamReduce.h +++ b/internal/core/src/segcore/reduce/StreamReduce.h @@ -90,7 +90,7 @@ struct StreamSearchResultPair { bool operator>(const StreamSearchResultPair& other) const { - if (std::fabs(distance_ - other.distance_) < 0.0000000119) { + if (std::fabs(distance_ - other.distance_) < EPSILON) { return primary_key_ < other.primary_key_; } return distance_ > other.distance_;