fix: [2.5] ensure deterministic search result ordering when scores are equal (#44870) (#44885)

Cherry-pick from master
pr: #44870

Related to #44819
This fix addresses an issue(#44819) where the offset parameter did not
work correctly during searches when multiple results had identical
scores. The problem occurred because results with equal scores were not
consistently ordered, leading to unpredictable pagination behavior.

The solution adds a new sorting step (SortEqualScoresByPks) in the
reduce phase that sorts results with identical scores by their primary
keys in ascending order. This ensures deterministic ordering and enables
proper offset functionality.

Changes:
- Add SortEqualScoresByPks() to sort results with equal scores by PK
- Add SortEqualScoresOneNQ() to handle per-query sorting logic
- Invoke sorting step after FillPrimaryKey() in Reduce() workflow

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-10-16 19:34:08 +08:00 committed by GitHub
parent 82081eba1b
commit 93411a388c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 91 additions and 2 deletions

View File

@ -104,3 +104,6 @@ const int64_t STORAGE_V1 = 1;
const int64_t STORAGE_V2 = 2; const int64_t STORAGE_V2 = 2;
const std::string UNKNOW_CAST_FUNCTION_NAME = "unknown"; const std::string UNKNOW_CAST_FUNCTION_NAME = "unknown";
// EPSILON value for comparing float numbers
const float EPSILON = 0.0000000119;

View File

@ -57,7 +57,7 @@ struct SearchResultPair {
bool bool
operator>(const SearchResultPair& other) const { operator>(const SearchResultPair& other) const {
if (std::fabs(distance_ - other.distance_) < 0.0000000119) { if (std::fabs(distance_ - other.distance_) < EPSILON) {
return primary_key_ < other.primary_key_; return primary_key_ < other.primary_key_;
} }
return distance_ > other.distance_; return distance_ > other.distance_;

View File

@ -55,6 +55,7 @@ ReduceHelper::Initialize() {
void void
ReduceHelper::Reduce() { ReduceHelper::Reduce() {
FillPrimaryKey(); FillPrimaryKey();
SortEqualScoresByPks();
ReduceResultData(); ReduceResultData();
RefreshSearchResults(); RefreshSearchResults();
FillEntryData(); FillEntryData();
@ -145,6 +146,83 @@ ReduceHelper::FillPrimaryKey() {
num_segments_ = search_results_.size(); num_segments_ = search_results_.size();
} }
void
ReduceHelper::SortEqualScoresByPks() {
tracer::AutoSpan span("ReduceHelper::SortEqualScoresByPks",
tracer::GetRootSpan());
for (auto& search_result : search_results_) {
for (int64_t i = 0; i < search_result->total_nq_; i++) {
auto nq_begin = search_result->topk_per_nq_prefix_sum_[i];
auto nq_end = search_result->topk_per_nq_prefix_sum_[i + 1];
SortEqualScoresOneNQ(nq_begin, nq_end, search_result);
}
}
}
void
ReduceHelper::SortEqualScoresOneNQ(size_t nq_begin,
size_t nq_end,
SearchResult* search_result) {
if (nq_end - nq_begin <= 1)
return;
size_t start = nq_begin;
while (start < nq_end) {
// find scope with same scores
size_t end = start + 1;
while (end < nq_end &&
std::fabs(search_result->distances_[end] -
search_result->distances_[start]) < EPSILON) {
++end;
}
if (end - start > 1) {
// Create lightweight index array for sorting
std::vector<size_t> indices(end - start);
std::iota(indices.begin(), indices.end(), 0);
// Sort indices by comparing primary keys
std::sort(indices.begin(),
indices.end(),
[&search_result, start](size_t i, size_t j) {
return search_result->primary_keys_[start + i] <
search_result->primary_keys_[start + j];
});
// Apply in-place cyclic permutation
for (size_t i = 0; i < indices.size();) {
size_t target = indices[i];
if (target == i) {
++i;
continue;
}
// Start of a new cycle
PkType temp_pk =
std::move(search_result->primary_keys_[start + i]);
int64_t temp_offset = search_result->seg_offsets_[start + i];
size_t curr = i;
while (indices[curr] != i) {
size_t next = indices[curr];
search_result->primary_keys_[start + curr] =
std::move(search_result->primary_keys_[start + next]);
search_result->seg_offsets_[start + curr] =
search_result->seg_offsets_[start + next];
indices[curr] = curr; // Mark as processed
curr = next;
}
search_result->primary_keys_[start + curr] = std::move(temp_pk);
search_result->seg_offsets_[start + curr] = temp_offset;
indices[curr] = curr;
}
}
start = end;
}
}
void void
ReduceHelper::RefreshSearchResults() { ReduceHelper::RefreshSearchResults() {
tracer::AutoSpan span("ReduceHelper::RefreshSearchResults", tracer::AutoSpan span("ReduceHelper::RefreshSearchResults",

View File

@ -89,6 +89,14 @@ class ReduceHelper {
std::unique_ptr<milvus::proto::schema::SearchResultData>& std::unique_ptr<milvus::proto::schema::SearchResultData>&
search_res_data); search_res_data);
virtual void
SortEqualScoresByPks();
virtual void
SortEqualScoresOneNQ(size_t nq_begin,
size_t nq_end,
SearchResult* search_result);
private: private:
void void
Initialize(); Initialize();

View File

@ -90,7 +90,7 @@ struct StreamSearchResultPair {
bool bool
operator>(const StreamSearchResultPair& other) const { operator>(const StreamSearchResultPair& other) const {
if (std::fabs(distance_ - other.distance_) < 0.0000000119) { if (std::fabs(distance_ - other.distance_) < EPSILON) {
return primary_key_ < other.primary_key_; return primary_key_ < other.primary_key_;
} }
return distance_ > other.distance_; return distance_ > other.distance_;