diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp index 4a5ffef6f8..0047e20636 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp @@ -231,6 +231,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson( ValueType val, ValueType right_operand, const std::string& pointer) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // Nothing to do here since the caller has already handled valid_res. + if (data == nullptr) { + return; + } switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -598,6 +603,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray( ValueType val, ValueType right_operand, int index) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // Nothing to do here since the caller has already handled valid_res. + if (data == nullptr) { + return; + } switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -1457,6 +1467,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData( TargetBitmapView valid_res, HighPrecisionType value, HighPrecisionType right_operand) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // Nothing to do here since the caller has already handled valid_res. + if (data == nullptr) { + return; + } switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index 057fed18cc..cf2253714d 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -331,6 +331,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) { TargetBitmapView valid_res, HighPrecisionType val1, HighPrecisionType val2) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } if (lower_inclusive && upper_inclusive) { BinaryRangeElementFunc func; func(val1, @@ -479,6 +485,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) { TargetBitmapView valid_res, ValueType val1, ValueType val2) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForJson func; @@ -782,6 +794,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) { ValueType val1, ValueType val2, int index) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForArray func; diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index d475bd4659..377a59d020 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -148,6 +148,12 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const std::string& pointer) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } bool has_bitmap_input = !bitmap_input.empty(); for (int i = 0; i < size; ++i) { auto offset = i; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 46c73ee016..5cb8a9644b 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -753,10 +753,39 @@ class SegmentExpr : public Expr { values...); } } else { + // Chunk is skipped by SkipIndex. + // We still need to: + // 1. Apply valid_data to handle nullable fields + // 2. Call func with nullptr to update internal cursors + // (e.g., processed_cursor for bitmap_input indexing) ApplyValidData(valid_data, res + processed_size, valid_res + processed_size, size); + // Call func with nullptr to update internal cursors + if constexpr (NeedSegmentOffsets) { + std::vector segment_offsets_array(size); + for (int64_t j = 0; j < size; ++j) { + segment_offsets_array[j] = static_cast( + size_per_chunk_ * i + data_pos + j); + } + func(nullptr, + nullptr, + nullptr, + segment_offsets_array.data(), + size, + res + processed_size, + valid_res + processed_size, + values...); + } else { + func(nullptr, + nullptr, + nullptr, + size, + res + processed_size, + valid_res + processed_size, + values...); + } } processed_size += size; @@ -887,6 +916,11 @@ class SegmentExpr : public Expr { } } } else { + // Chunk is skipped by SkipIndex. + // We still need to: + // 1. Apply valid_data to handle nullable fields + // 2. Call func with nullptr to update internal cursors + // (e.g., processed_cursor for bitmap_input indexing) const bool* valid_data; if constexpr (std::is_same_v || std::is_same_v || @@ -910,6 +944,25 @@ class SegmentExpr : public Expr { valid_res + processed_size, size); } + // Call func with nullptr to update internal cursors + if constexpr (NeedSegmentOffsets) { + func(nullptr, + nullptr, + nullptr, + segment_offsets_array.data(), + size, + res + processed_size, + valid_res + processed_size, + values...); + } else { + func(nullptr, + nullptr, + nullptr, + size, + res + processed_size, + valid_res + processed_size, + values...); + } } processed_size += size; diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index 94b6de2755..5b99f13616 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -242,6 +242,12 @@ PhyJsonContainsFilterExpr::ExecArrayContains(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const std::shared_ptr& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) { const auto& array = data[i]; for (int j = 0; j < array.length(); ++j) { @@ -336,6 +342,12 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) { TargetBitmapView valid_res, const std::string& pointer, const std::shared_ptr& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -566,6 +578,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) { TargetBitmapView valid_res, const std::string& pointer, const std::vector& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) -> bool { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -764,6 +782,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const std::set& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) { std::set tmp_elements(elements); // Note: array can only be iterated once @@ -864,6 +888,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) { TargetBitmapView valid_res, const std::string& pointer, const std::set& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](const size_t i) -> bool { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -1097,6 +1127,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) { const std::string& pointer, const std::vector& elements, const std::unordered_set elements_index) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) -> bool { const auto& json = data[i]; auto doc = json.dom_doc(); @@ -1423,6 +1459,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) { TargetBitmapView valid_res, const std::string& pointer, const std::vector& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](const size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -1631,6 +1673,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) { TargetBitmapView valid_res, const std::string& pointer, const std::vector& elements) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](const size_t i) { auto& json = data[i]; auto doc = json.dom_doc(); diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index 5544cd47db..5f8497319f 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -287,6 +287,12 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const ValueType& target_val) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t offset) { for (int i = 0; i < data[offset].length(); i++) { auto val = data[offset].template get_data(i); @@ -383,6 +389,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) { TargetBitmapView valid_res, int index, const std::shared_ptr& term_set) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } bool has_bitmap_input = !bitmap_input.empty(); for (int i = 0; i < size; ++i) { auto offset = i; @@ -474,6 +486,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) { TargetBitmapView valid_res, const std::string pointer, const ValueType& target_val) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -749,6 +767,12 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) { TargetBitmapView valid_res, const std::string pointer, const std::shared_ptr& terms) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } auto executor = [&](size_t i) { auto x = data[i].template at(pointer); if (x.error()) { @@ -944,6 +968,12 @@ PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const std::shared_ptr& vals) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } bool has_bitmap_input = !bitmap_input.empty(); for (size_t i = 0; i < size; ++i) { auto offset = i; diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index 3cbfd24d5e..6206f6938a 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -1558,6 +1558,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, IndexInnerType val) { + // If data is nullptr, this chunk was skipped by SkipIndex. + // We only need to update processed_cursor for bitmap_input indexing. + if (data == nullptr) { + processed_cursor += size; + return; + } switch (expr_type) { case proto::plan::GreaterThan: { UnaryElementFunc func; diff --git a/internal/core/unittest/test_exec.cpp b/internal/core/unittest/test_exec.cpp index d2506d3638..a4cf6ce583 100644 --- a/internal/core/unittest/test_exec.cpp +++ b/internal/core/unittest/test_exec.cpp @@ -1221,3 +1221,291 @@ TEST_P(TaskTest, Test_MultiInConvert) { EXPECT_EQ(inputs.size(), 3); } } + +// This test verifies the fix for https://github.com/milvus-io/milvus/issues/46053. +// +// Bug scenario: +// - Expression: string_field == "target" AND int64_field == X AND float_field > Y +// - Data is stored in multiple chunks +// - SkipIndex skips some chunks for the float range condition +// - Expression reordering: numeric expressions execute before string expressions +// - When a chunk is skipped, processed_cursor in execute_sub_batch wasn't updated +// - This caused bitmap_input indices to be misaligned for subsequent expressions +// +// The fix ensures that when a chunk is skipped by SkipIndex, we still call +// func(nullptr, ...) so that execute_sub_batch can update its internal cursors. +TEST(TaskTest, SkipIndexWithBitmapInputAlignment) { + using namespace milvus; + using namespace milvus::query; + using namespace milvus::segcore; + using namespace milvus::exec; + + auto schema = std::make_shared(); + auto dim = 4; + auto metrics_type = "L2"; + auto fake_vec_fid = schema->AddDebugField( + "fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type); + auto pk_fid = schema->AddDebugField("pk", DataType::INT64); + schema->set_primary_field_id(pk_fid); + auto string_fid = schema->AddDebugField("string_field", DataType::VARCHAR); + auto int64_fid = schema->AddDebugField("int64_field", DataType::INT64); + auto float_fid = schema->AddDebugField("float_field", DataType::FLOAT); + + auto segment = CreateSealedSegment(schema); + auto cm = milvus::storage::RemoteChunkManagerSingleton::GetInstance() + .GetRemoteChunkManager(); + + // Create two chunks with different data distributions: + // Chunk 0: float values [10, 20, 30, 40, 50] - will be SKIPPED by float > 60 + // Chunk 1: float values [65, 70, 75, 80, 85] - will NOT be skipped + // + // We place the target row (string="target_value", int64=999) in chunk 1 at index 2 + // with float=75 which satisfies float > 60 + + const size_t chunk_size = 5; + + // Chunk 0: floats that will cause this chunk to be skipped (max=50 < 60) + std::vector floats_chunk0 = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f}; + auto float_field_data_0 = storage::CreateFieldData( + DataType::FLOAT, DataType::NONE, false, 1, chunk_size); + float_field_data_0->FillFieldData(floats_chunk0.data(), chunk_size); + + // Chunk 1: floats that will NOT be skipped (min=65 > 60) + std::vector floats_chunk1 = {65.0f, 70.0f, 75.0f, 80.0f, 85.0f}; + auto float_field_data_1 = storage::CreateFieldData( + DataType::FLOAT, DataType::NONE, false, 1, chunk_size); + float_field_data_1->FillFieldData(floats_chunk1.data(), chunk_size); + + auto float_load_info = + PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + float_fid.get(), + {float_field_data_0, float_field_data_1}, + cm); + segment->LoadFieldData(float_load_info); + + // Int64 field - target value 999 at chunk 1 index 2 + std::vector int64s_chunk0 = {1, 2, 3, 4, 5}; + auto int64_field_data_0 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + int64_field_data_0->FillFieldData(int64s_chunk0.data(), chunk_size); + + std::vector int64s_chunk1 = {6, 7, 999, 9, 10}; // 999 at index 2 + auto int64_field_data_1 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + int64_field_data_1->FillFieldData(int64s_chunk1.data(), chunk_size); + + auto int64_load_info = + PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + int64_fid.get(), + {int64_field_data_0, int64_field_data_1}, + cm); + segment->LoadFieldData(int64_load_info); + + // String field - target value "target_value" at chunk 1 index 2 + std::vector strings_chunk0 = {"a", "b", "c", "d", "e"}; + auto string_field_data_0 = storage::CreateFieldData( + DataType::VARCHAR, DataType::NONE, false, 1, chunk_size); + string_field_data_0->FillFieldData(strings_chunk0.data(), chunk_size); + + std::vector strings_chunk1 = { + "f", "g", "target_value", "i", "j"}; + auto string_field_data_1 = storage::CreateFieldData( + DataType::VARCHAR, DataType::NONE, false, 1, chunk_size); + string_field_data_1->FillFieldData(strings_chunk1.data(), chunk_size); + + auto string_load_info = PrepareSingleFieldInsertBinlog( + kCollectionID, + kPartitionID, + kSegmentID, + string_fid.get(), + {string_field_data_0, string_field_data_1}, + cm); + segment->LoadFieldData(string_load_info); + + // PK field + std::vector pks_chunk0 = {100, 101, 102, 103, 104}; + auto pk_field_data_0 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + pk_field_data_0->FillFieldData(pks_chunk0.data(), chunk_size); + + std::vector pks_chunk1 = {105, 106, 107, 108, 109}; + auto pk_field_data_1 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + pk_field_data_1->FillFieldData(pks_chunk1.data(), chunk_size); + + auto pk_load_info = + PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + pk_fid.get(), + {pk_field_data_0, pk_field_data_1}, + cm); + segment->LoadFieldData(pk_load_info); + + // Vector field (required but not used in filter) + std::vector vec_chunk0(chunk_size * dim, 1.0f); + auto vec_field_data_0 = storage::CreateFieldData( + DataType::VECTOR_FLOAT, DataType::NONE, false, dim, chunk_size); + vec_field_data_0->FillFieldData(vec_chunk0.data(), chunk_size); + + std::vector vec_chunk1(chunk_size * dim, 2.0f); + auto vec_field_data_1 = storage::CreateFieldData( + DataType::VECTOR_FLOAT, DataType::NONE, false, dim, chunk_size); + vec_field_data_1->FillFieldData(vec_chunk1.data(), chunk_size); + + auto vec_load_info = + PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + fake_vec_fid.get(), + {vec_field_data_0, vec_field_data_1}, + cm); + segment->LoadFieldData(vec_load_info); + + // Row IDs + std::vector row_ids_chunk0 = {0, 1, 2, 3, 4}; + auto row_ids_data_0 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + row_ids_data_0->FillFieldData(row_ids_chunk0.data(), chunk_size); + + std::vector row_ids_chunk1 = {5, 6, 7, 8, 9}; + auto row_ids_data_1 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + row_ids_data_1->FillFieldData(row_ids_chunk1.data(), chunk_size); + + auto row_id_load_info = + PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + RowFieldID.get(), + {row_ids_data_0, row_ids_data_1}, + cm); + segment->LoadFieldData(row_id_load_info); + + // Timestamps + std::vector timestamps_chunk0 = {1, 1, 1, 1, 1}; + auto ts_data_0 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + ts_data_0->FillFieldData(timestamps_chunk0.data(), chunk_size); + + std::vector timestamps_chunk1 = {1, 1, 1, 1, 1}; + auto ts_data_1 = storage::CreateFieldData( + DataType::INT64, DataType::NONE, false, 1, chunk_size); + ts_data_1->FillFieldData(timestamps_chunk1.data(), chunk_size); + + auto ts_load_info = PrepareSingleFieldInsertBinlog(kCollectionID, + kPartitionID, + kSegmentID, + TimestampFieldID.get(), + {ts_data_0, ts_data_1}, + cm); + segment->LoadFieldData(ts_load_info); + + // Build the expression: + // string_field == "target_value" AND int64_field == 999 AND float_field > 60 + // + // Due to expression reordering, this will execute as: + // 1. float_field > 60 (numeric, runs first) - SkipIndex skips chunk 0 + // 2. int64_field == 999 (numeric, runs second) + // 3. string_field == "target_value" (string, runs last) + + // string_field == "target_value" + proto::plan::GenericValue string_val; + string_val.set_string_val("target_value"); + auto string_expr = std::make_shared( + expr::ColumnInfo(string_fid, DataType::VARCHAR), + proto::plan::OpType::Equal, + string_val, + std::vector{}); + + // int64_field == 999 + proto::plan::GenericValue int64_val; + int64_val.set_int64_val(999); + auto int64_expr = std::make_shared( + expr::ColumnInfo(int64_fid, DataType::INT64), + proto::plan::OpType::Equal, + int64_val, + std::vector{}); + + // float_field > 60 + proto::plan::GenericValue float_val; + float_val.set_float_val(60.0f); + auto float_expr = std::make_shared( + expr::ColumnInfo(float_fid, DataType::FLOAT), + proto::plan::OpType::GreaterThan, + float_val, + std::vector{}); + + // Build AND expression: string_expr AND int64_expr AND float_expr + auto and_expr1 = std::make_shared( + expr::LogicalBinaryExpr::OpType::And, string_expr, int64_expr); + auto and_expr2 = std::make_shared( + expr::LogicalBinaryExpr::OpType::And, and_expr1, float_expr); + + // Verify SkipIndex is working before running the expression: + // Check if chunk 0 can be skipped for float > 60 + auto& skip_index = segment->GetSkipIndex(); + bool chunk0_can_skip = skip_index.CanSkipUnaryRange( + float_fid, 0, proto::plan::OpType::GreaterThan, 60.0f); + bool chunk1_can_skip = skip_index.CanSkipUnaryRange( + float_fid, 1, proto::plan::OpType::GreaterThan, 60.0f); + + // Chunk 0 should be skippable (max=50 < 60), chunk 1 should not (min=65 > 60) + EXPECT_TRUE(chunk0_can_skip) + << "Chunk 0 should be skippable for float > 60 (max=50)"; + EXPECT_FALSE(chunk1_can_skip) + << "Chunk 1 should NOT be skippable for float > 60 (min=65)"; + + std::vector sources; + auto filter_node = std::make_shared( + "plannode id 1", and_expr2, sources); + auto plan = plan::PlanFragment(filter_node); + + auto query_context = std::make_shared( + "test_skip_index_bitmap_alignment", + segment.get(), + chunk_size * 2, // total rows + MAX_TIMESTAMP, + 0, + 0, + query::PlanOptions{false}, + std::make_shared( + std::unordered_map{})); + + auto task = Task::Create("task_skip_index_bitmap", plan, 0, query_context); + + int64_t total_rows = 0; + int64_t filtered_rows = 0; + for (;;) { + auto result = task->Next(); + if (!result) { + break; + } + auto col_vec = + std::dynamic_pointer_cast(result->child(0)); + if (col_vec && col_vec->IsBitmap()) { + TargetBitmapView view(col_vec->GetRawData(), col_vec->size()); + total_rows += col_vec->size(); + filtered_rows += + view.count(); // These are filtered OUT (don't match) + } + } + + int64_t num_matched = total_rows - filtered_rows; + + // Expected result: exactly 1 row should match + // - Row at chunk 1, index 2 (global index 7) has: + // - string_field = "target_value" ✓ + // - int64_field = 999 ✓ + // - float_field = 75 > 60 ✓ + // + // With the bug (before fix): 0 rows would match because bitmap_input + // indices were misaligned after chunk 0 was skipped. + // + // With the fix: 1 row should match correctly. + EXPECT_EQ(num_matched, 1); +}