mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Cherry-pick from master pr: #44870 Related to #44819 This fix addresses an issue(#44819) where the offset parameter did not work correctly during searches when multiple results had identical scores. The problem occurred because results with equal scores were not consistently ordered, leading to unpredictable pagination behavior. The solution adds a new sorting step (SortEqualScoresByPks) in the reduce phase that sorts results with identical scores by their primary keys in ascending order. This ensures deterministic ordering and enables proper offset functionality. Changes: - Add SortEqualScoresByPks() to sort results with equal scores by PK - Add SortEqualScoresOneNQ() to handle per-query sorting logic - Invoke sorting step after FillPrimaryKey() in Reduce() workflow --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
82081eba1b
commit
93411a388c
@ -104,3 +104,6 @@ const int64_t STORAGE_V1 = 1;
|
||||
const int64_t STORAGE_V2 = 2;
|
||||
|
||||
const std::string UNKNOW_CAST_FUNCTION_NAME = "unknown";
|
||||
|
||||
// EPSILON value for comparing float numbers
|
||||
const float EPSILON = 0.0000000119;
|
||||
|
||||
@ -57,7 +57,7 @@ struct SearchResultPair {
|
||||
|
||||
bool
|
||||
operator>(const SearchResultPair& other) const {
|
||||
if (std::fabs(distance_ - other.distance_) < 0.0000000119) {
|
||||
if (std::fabs(distance_ - other.distance_) < EPSILON) {
|
||||
return primary_key_ < other.primary_key_;
|
||||
}
|
||||
return distance_ > other.distance_;
|
||||
|
||||
@ -55,6 +55,7 @@ ReduceHelper::Initialize() {
|
||||
void
|
||||
ReduceHelper::Reduce() {
|
||||
FillPrimaryKey();
|
||||
SortEqualScoresByPks();
|
||||
ReduceResultData();
|
||||
RefreshSearchResults();
|
||||
FillEntryData();
|
||||
@ -145,6 +146,83 @@ ReduceHelper::FillPrimaryKey() {
|
||||
num_segments_ = search_results_.size();
|
||||
}
|
||||
|
||||
void
|
||||
ReduceHelper::SortEqualScoresByPks() {
|
||||
tracer::AutoSpan span("ReduceHelper::SortEqualScoresByPks",
|
||||
tracer::GetRootSpan());
|
||||
for (auto& search_result : search_results_) {
|
||||
for (int64_t i = 0; i < search_result->total_nq_; i++) {
|
||||
auto nq_begin = search_result->topk_per_nq_prefix_sum_[i];
|
||||
auto nq_end = search_result->topk_per_nq_prefix_sum_[i + 1];
|
||||
SortEqualScoresOneNQ(nq_begin, nq_end, search_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ReduceHelper::SortEqualScoresOneNQ(size_t nq_begin,
|
||||
size_t nq_end,
|
||||
SearchResult* search_result) {
|
||||
if (nq_end - nq_begin <= 1)
|
||||
return;
|
||||
|
||||
size_t start = nq_begin;
|
||||
while (start < nq_end) {
|
||||
// find scope with same scores
|
||||
size_t end = start + 1;
|
||||
while (end < nq_end &&
|
||||
std::fabs(search_result->distances_[end] -
|
||||
search_result->distances_[start]) < EPSILON) {
|
||||
++end;
|
||||
}
|
||||
|
||||
if (end - start > 1) {
|
||||
// Create lightweight index array for sorting
|
||||
std::vector<size_t> indices(end - start);
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
|
||||
// Sort indices by comparing primary keys
|
||||
std::sort(indices.begin(),
|
||||
indices.end(),
|
||||
[&search_result, start](size_t i, size_t j) {
|
||||
return search_result->primary_keys_[start + i] <
|
||||
search_result->primary_keys_[start + j];
|
||||
});
|
||||
|
||||
// Apply in-place cyclic permutation
|
||||
for (size_t i = 0; i < indices.size();) {
|
||||
size_t target = indices[i];
|
||||
if (target == i) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Start of a new cycle
|
||||
PkType temp_pk =
|
||||
std::move(search_result->primary_keys_[start + i]);
|
||||
int64_t temp_offset = search_result->seg_offsets_[start + i];
|
||||
|
||||
size_t curr = i;
|
||||
while (indices[curr] != i) {
|
||||
size_t next = indices[curr];
|
||||
search_result->primary_keys_[start + curr] =
|
||||
std::move(search_result->primary_keys_[start + next]);
|
||||
search_result->seg_offsets_[start + curr] =
|
||||
search_result->seg_offsets_[start + next];
|
||||
indices[curr] = curr; // Mark as processed
|
||||
curr = next;
|
||||
}
|
||||
|
||||
search_result->primary_keys_[start + curr] = std::move(temp_pk);
|
||||
search_result->seg_offsets_[start + curr] = temp_offset;
|
||||
indices[curr] = curr;
|
||||
}
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ReduceHelper::RefreshSearchResults() {
|
||||
tracer::AutoSpan span("ReduceHelper::RefreshSearchResults",
|
||||
|
||||
@ -89,6 +89,14 @@ class ReduceHelper {
|
||||
std::unique_ptr<milvus::proto::schema::SearchResultData>&
|
||||
search_res_data);
|
||||
|
||||
virtual void
|
||||
SortEqualScoresByPks();
|
||||
|
||||
virtual void
|
||||
SortEqualScoresOneNQ(size_t nq_begin,
|
||||
size_t nq_end,
|
||||
SearchResult* search_result);
|
||||
|
||||
private:
|
||||
void
|
||||
Initialize();
|
||||
|
||||
@ -90,7 +90,7 @@ struct StreamSearchResultPair {
|
||||
|
||||
bool
|
||||
operator>(const StreamSearchResultPair& other) const {
|
||||
if (std::fabs(distance_ - other.distance_) < 0.0000000119) {
|
||||
if (std::fabs(distance_ - other.distance_) < EPSILON) {
|
||||
return primary_key_ < other.primary_key_;
|
||||
}
|
||||
return distance_ > other.distance_;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user