From 59752f216d6656e055547c8cf7ba0cf499b29ce5 Mon Sep 17 00:00:00 2001 From: Xinyi7 Date: Thu, 4 Dec 2025 04:35:17 -0500 Subject: [PATCH] fix: add check in batch_score function to prevent query node seg fault (#46025) previously we saw that when doing reranker with phrase matching, the query node throws a segmentation fault error. github issue link: https://github.com/milvus-io/milvus/issues/45990 --------- Signed-off-by: Xinyi Jiang Co-authored-by: Xinyi Jiang --- internal/core/src/rescores/Scorer.cpp | 18 ++++-- internal/core/unittest/CMakeLists.txt | 1 + internal/core/unittest/test_scorer.cpp | 84 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 internal/core/unittest/test_scorer.cpp diff --git a/internal/core/src/rescores/Scorer.cpp b/internal/core/src/rescores/Scorer.cpp index 7d9fcb1a0b..a781ded419 100644 --- a/internal/core/src/rescores/Scorer.cpp +++ b/internal/core/src/rescores/Scorer.cpp @@ -48,10 +48,18 @@ WeightScorer::batch_score(milvus::OpContext* op_ctx, const FixedVector& offsets, const TargetBitmap& bitmap, std::vector>& boost_scores) { + auto bitmap_size = bitmap.size(); for (auto i = 0; i < offsets.size(); i++) { - if (bitmap[offsets[i]] > 0) { - set_score(boost_scores[i], mode); + auto offset = offsets[i]; + // Bounds check: offset must be within bitmap size. + // Race condition: text index may lag behind vector index, + // causing offsets to reference rows not yet in text index. + if (offset >= 0 && static_cast(offset) < bitmap_size) { + if (bitmap[offset] > 0) { + set_score(boost_scores[i], mode); + } } + // If offset is out of bounds, treat as "no match" (don't apply boost) } }; @@ -163,9 +171,9 @@ RandomScorer::random_score(milvus::OpContext* op_ctx, "now only support int64 field as seed"); // TODO: Support varchar and int32 field as random field. - auto datas = array->scalars().long_data(); - for (int i = 0; i < datas.data_size(); i++) { - auto a = datas.data()[i]; + auto data = array->scalars().long_data(); + for (int i = 0; i < data.data_size(); i++) { + auto a = data.data()[i]; auto random_score = hash_to_double(MurmurHash3_x64_64_Special(a, seed_)); if (idx == nullptr) { diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index b7d5293c1b..9da374daf3 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -43,6 +43,7 @@ set(MILVUS_TEST_FILES test_index_wrapper.cpp test_integer_overflow.cpp test_query.cpp + test_scorer.cpp test_sealed.cpp test_storage.cpp test_string_expr.cpp diff --git a/internal/core/unittest/test_scorer.cpp b/internal/core/unittest/test_scorer.cpp new file mode 100644 index 0000000000..7bd4522760 --- /dev/null +++ b/internal/core/unittest/test_scorer.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include + +#include "common/Types.h" +#include "rescores/Scorer.h" +#include "pb/plan.pb.h" + +using namespace milvus; +using namespace milvus::rescores; + +class WeightScorerTest : public ::testing::Test { + protected: + void + SetUp() override { + // Create a WeightScorer with no filter and weight of 2.0 + scorer_ = std::make_unique(nullptr, 2.0f); + } + + std::unique_ptr scorer_; +}; + +// Test: TargetBitmap batch_score with valid offsets (all within bitmap bounds) +TEST_F(WeightScorerTest, BatchScoreTargetBitmapValidOffsets) { + TargetBitmap bitmap(100); + bitmap.set(10); + bitmap.set(50); + bitmap.set(90); + + // Offsets that are all within bitmap bounds + FixedVector offsets = {10, 20, 50, 90}; + std::vector> boost_scores(offsets.size(), + std::nullopt); + + proto::plan::FunctionMode mode = proto::plan::FunctionMode::FunctionModeSum; + + scorer_->batch_score(nullptr, nullptr, mode, offsets, bitmap, boost_scores); + + // Positions 10, 50, 90 should have scores (they are set in bitmap) + EXPECT_TRUE(boost_scores[0].has_value()); + EXPECT_FALSE(boost_scores[1].has_value()); + EXPECT_TRUE(boost_scores[2].has_value()); + EXPECT_TRUE(boost_scores[3].has_value()); +} + +// Test: TargetBitmap batch_score with out-of-bounds offsets (should NOT crash) +TEST_F(WeightScorerTest, BatchScoreTargetBitmapOutOfBoundsOffsets) { + // Create a small bitmap of size 50 + TargetBitmap bitmap(50); + bitmap.set(10); // Set bit at position 10 + bitmap.set(40); // Set bit at position 40 + + // Offsets where some are OUT OF BOUNDS (>= 50) + // This simulates the race condition where text index lags behind vector index + FixedVector offsets = {10, 40, 60, 100, 200}; + std::vector> boost_scores(offsets.size(), + std::nullopt); + + proto::plan::FunctionMode mode = proto::plan::FunctionMode::FunctionModeSum; + + // Should NOT crash! Out-of-bounds offsets should be safely skipped + ASSERT_NO_THROW(scorer_->batch_score( + nullptr, nullptr, mode, offsets, bitmap, boost_scores)); + + // In-bounds offsets should be scored correctly + EXPECT_TRUE(boost_scores[0].has_value()); + EXPECT_TRUE(boost_scores[1].has_value()); + + // Out-of-bounds offsets should NOT have scores (safely skipped) + EXPECT_FALSE(boost_scores[2].has_value()); + EXPECT_FALSE(boost_scores[3].has_value()); + EXPECT_FALSE(boost_scores[4].has_value()); +}