mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
fix: move cursor after skip index skipped a chunk (#46054)
issue: #46053 Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
parent
3d17ddb71b
commit
1372e84d7f
@ -231,6 +231,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
|
||||
ValueType val,
|
||||
ValueType right_operand,
|
||||
const std::string& pointer) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// Nothing to do here since the caller has already handled valid_res.
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
switch (op_type) {
|
||||
case proto::plan::OpType::Equal: {
|
||||
switch (arith_type) {
|
||||
@ -598,6 +603,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
|
||||
ValueType val,
|
||||
ValueType right_operand,
|
||||
int index) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// Nothing to do here since the caller has already handled valid_res.
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
switch (op_type) {
|
||||
case proto::plan::OpType::Equal: {
|
||||
switch (arith_type) {
|
||||
@ -1457,6 +1467,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
|
||||
TargetBitmapView valid_res,
|
||||
HighPrecisionType value,
|
||||
HighPrecisionType right_operand) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// Nothing to do here since the caller has already handled valid_res.
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
switch (op_type) {
|
||||
case proto::plan::OpType::Equal: {
|
||||
switch (arith_type) {
|
||||
|
||||
@ -331,6 +331,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
HighPrecisionType val1,
|
||||
HighPrecisionType val2) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFunc<T, true, true, filter_type> func;
|
||||
func(val1,
|
||||
@ -479,6 +485,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val1,
|
||||
ValueType val2) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForJson<ValueType, true, true, filter_type>
|
||||
func;
|
||||
@ -782,6 +794,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
||||
ValueType val1,
|
||||
ValueType val2,
|
||||
int index) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForArray<ValueType, true, true, filter_type>
|
||||
func;
|
||||
|
||||
@ -148,6 +148,12 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
|
||||
@ -753,10 +753,39 @@ class SegmentExpr : public Expr {
|
||||
values...);
|
||||
}
|
||||
} else {
|
||||
// Chunk is skipped by SkipIndex.
|
||||
// We still need to:
|
||||
// 1. Apply valid_data to handle nullable fields
|
||||
// 2. Call func with nullptr to update internal cursors
|
||||
// (e.g., processed_cursor for bitmap_input indexing)
|
||||
ApplyValidData(valid_data,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
size);
|
||||
// Call func with nullptr to update internal cursors
|
||||
if constexpr (NeedSegmentOffsets) {
|
||||
std::vector<int32_t> segment_offsets_array(size);
|
||||
for (int64_t j = 0; j < size; ++j) {
|
||||
segment_offsets_array[j] = static_cast<int32_t>(
|
||||
size_per_chunk_ * i + data_pos + j);
|
||||
}
|
||||
func(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
segment_offsets_array.data(),
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
} else {
|
||||
func(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
}
|
||||
}
|
||||
|
||||
processed_size += size;
|
||||
@ -887,6 +916,11 @@ class SegmentExpr : public Expr {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Chunk is skipped by SkipIndex.
|
||||
// We still need to:
|
||||
// 1. Apply valid_data to handle nullable fields
|
||||
// 2. Call func with nullptr to update internal cursors
|
||||
// (e.g., processed_cursor for bitmap_input indexing)
|
||||
const bool* valid_data;
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json> ||
|
||||
@ -910,6 +944,25 @@ class SegmentExpr : public Expr {
|
||||
valid_res + processed_size,
|
||||
size);
|
||||
}
|
||||
// Call func with nullptr to update internal cursors
|
||||
if constexpr (NeedSegmentOffsets) {
|
||||
func(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
segment_offsets_array.data(),
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
} else {
|
||||
func(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
}
|
||||
}
|
||||
|
||||
processed_size += size;
|
||||
|
||||
@ -242,6 +242,12 @@ PhyJsonContainsFilterExpr::ExecArrayContains(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::shared_ptr<MultiElement>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) {
|
||||
const auto& array = data[i];
|
||||
for (int j = 0; j < array.length(); ++j) {
|
||||
@ -336,6 +342,12 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::shared_ptr<MultiElement>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -566,6 +578,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::Array>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) -> bool {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -764,6 +782,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::set<GetType>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) {
|
||||
std::set<GetType> tmp_elements(elements);
|
||||
// Note: array can only be iterated once
|
||||
@ -864,6 +888,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::set<GetType>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](const size_t i) -> bool {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -1097,6 +1127,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::GenericValue>& elements,
|
||||
const std::unordered_set<int> elements_index) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) -> bool {
|
||||
const auto& json = data[i];
|
||||
auto doc = json.dom_doc();
|
||||
@ -1423,6 +1459,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::Array>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](const size_t i) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -1631,6 +1673,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::GenericValue>& elements) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](const size_t i) {
|
||||
auto& json = data[i];
|
||||
auto doc = json.dom_doc();
|
||||
|
||||
@ -287,6 +287,12 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const ValueType& target_val) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t offset) {
|
||||
for (int i = 0; i < data[offset].length(); i++) {
|
||||
auto val = data[offset].template get_data<GetType>(i);
|
||||
@ -383,6 +389,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
int index,
|
||||
const std::shared_ptr<MultiElement>& term_set) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
@ -474,6 +486,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string pointer,
|
||||
const ValueType& target_val) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -749,6 +767,12 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
||||
TargetBitmapView valid_res,
|
||||
const std::string pointer,
|
||||
const std::shared_ptr<MultiElement>& terms) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
auto executor = [&](size_t i) {
|
||||
auto x = data[i].template at<GetType>(pointer);
|
||||
if (x.error()) {
|
||||
@ -944,6 +968,12 @@ PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::shared_ptr<MultiElement>& vals) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
|
||||
@ -1558,6 +1558,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
IndexInnerType val) {
|
||||
// If data is nullptr, this chunk was skipped by SkipIndex.
|
||||
// We only need to update processed_cursor for bitmap_input indexing.
|
||||
if (data == nullptr) {
|
||||
processed_cursor += size;
|
||||
return;
|
||||
}
|
||||
switch (expr_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
|
||||
|
||||
@ -1221,3 +1221,291 @@ TEST_P(TaskTest, Test_MultiInConvert) {
|
||||
EXPECT_EQ(inputs.size(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
// This test verifies the fix for https://github.com/milvus-io/milvus/issues/46053.
|
||||
//
|
||||
// Bug scenario:
|
||||
// - Expression: string_field == "target" AND int64_field == X AND float_field > Y
|
||||
// - Data is stored in multiple chunks
|
||||
// - SkipIndex skips some chunks for the float range condition
|
||||
// - Expression reordering: numeric expressions execute before string expressions
|
||||
// - When a chunk is skipped, processed_cursor in execute_sub_batch wasn't updated
|
||||
// - This caused bitmap_input indices to be misaligned for subsequent expressions
|
||||
//
|
||||
// The fix ensures that when a chunk is skipped by SkipIndex, we still call
|
||||
// func(nullptr, ...) so that execute_sub_batch can update its internal cursors.
|
||||
TEST(TaskTest, SkipIndexWithBitmapInputAlignment) {
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::exec;
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
auto pk_fid = schema->AddDebugField("pk", DataType::INT64);
|
||||
schema->set_primary_field_id(pk_fid);
|
||||
auto string_fid = schema->AddDebugField("string_field", DataType::VARCHAR);
|
||||
auto int64_fid = schema->AddDebugField("int64_field", DataType::INT64);
|
||||
auto float_fid = schema->AddDebugField("float_field", DataType::FLOAT);
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
auto cm = milvus::storage::RemoteChunkManagerSingleton::GetInstance()
|
||||
.GetRemoteChunkManager();
|
||||
|
||||
// Create two chunks with different data distributions:
|
||||
// Chunk 0: float values [10, 20, 30, 40, 50] - will be SKIPPED by float > 60
|
||||
// Chunk 1: float values [65, 70, 75, 80, 85] - will NOT be skipped
|
||||
//
|
||||
// We place the target row (string="target_value", int64=999) in chunk 1 at index 2
|
||||
// with float=75 which satisfies float > 60
|
||||
|
||||
const size_t chunk_size = 5;
|
||||
|
||||
// Chunk 0: floats that will cause this chunk to be skipped (max=50 < 60)
|
||||
std::vector<float> floats_chunk0 = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f};
|
||||
auto float_field_data_0 = storage::CreateFieldData(
|
||||
DataType::FLOAT, DataType::NONE, false, 1, chunk_size);
|
||||
float_field_data_0->FillFieldData(floats_chunk0.data(), chunk_size);
|
||||
|
||||
// Chunk 1: floats that will NOT be skipped (min=65 > 60)
|
||||
std::vector<float> floats_chunk1 = {65.0f, 70.0f, 75.0f, 80.0f, 85.0f};
|
||||
auto float_field_data_1 = storage::CreateFieldData(
|
||||
DataType::FLOAT, DataType::NONE, false, 1, chunk_size);
|
||||
float_field_data_1->FillFieldData(floats_chunk1.data(), chunk_size);
|
||||
|
||||
auto float_load_info =
|
||||
PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
float_fid.get(),
|
||||
{float_field_data_0, float_field_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(float_load_info);
|
||||
|
||||
// Int64 field - target value 999 at chunk 1 index 2
|
||||
std::vector<int64_t> int64s_chunk0 = {1, 2, 3, 4, 5};
|
||||
auto int64_field_data_0 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
int64_field_data_0->FillFieldData(int64s_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<int64_t> int64s_chunk1 = {6, 7, 999, 9, 10}; // 999 at index 2
|
||||
auto int64_field_data_1 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
int64_field_data_1->FillFieldData(int64s_chunk1.data(), chunk_size);
|
||||
|
||||
auto int64_load_info =
|
||||
PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
int64_fid.get(),
|
||||
{int64_field_data_0, int64_field_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(int64_load_info);
|
||||
|
||||
// String field - target value "target_value" at chunk 1 index 2
|
||||
std::vector<std::string> strings_chunk0 = {"a", "b", "c", "d", "e"};
|
||||
auto string_field_data_0 = storage::CreateFieldData(
|
||||
DataType::VARCHAR, DataType::NONE, false, 1, chunk_size);
|
||||
string_field_data_0->FillFieldData(strings_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<std::string> strings_chunk1 = {
|
||||
"f", "g", "target_value", "i", "j"};
|
||||
auto string_field_data_1 = storage::CreateFieldData(
|
||||
DataType::VARCHAR, DataType::NONE, false, 1, chunk_size);
|
||||
string_field_data_1->FillFieldData(strings_chunk1.data(), chunk_size);
|
||||
|
||||
auto string_load_info = PrepareSingleFieldInsertBinlog(
|
||||
kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
string_fid.get(),
|
||||
{string_field_data_0, string_field_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(string_load_info);
|
||||
|
||||
// PK field
|
||||
std::vector<int64_t> pks_chunk0 = {100, 101, 102, 103, 104};
|
||||
auto pk_field_data_0 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
pk_field_data_0->FillFieldData(pks_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<int64_t> pks_chunk1 = {105, 106, 107, 108, 109};
|
||||
auto pk_field_data_1 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
pk_field_data_1->FillFieldData(pks_chunk1.data(), chunk_size);
|
||||
|
||||
auto pk_load_info =
|
||||
PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
pk_fid.get(),
|
||||
{pk_field_data_0, pk_field_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(pk_load_info);
|
||||
|
||||
// Vector field (required but not used in filter)
|
||||
std::vector<float> vec_chunk0(chunk_size * dim, 1.0f);
|
||||
auto vec_field_data_0 = storage::CreateFieldData(
|
||||
DataType::VECTOR_FLOAT, DataType::NONE, false, dim, chunk_size);
|
||||
vec_field_data_0->FillFieldData(vec_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<float> vec_chunk1(chunk_size * dim, 2.0f);
|
||||
auto vec_field_data_1 = storage::CreateFieldData(
|
||||
DataType::VECTOR_FLOAT, DataType::NONE, false, dim, chunk_size);
|
||||
vec_field_data_1->FillFieldData(vec_chunk1.data(), chunk_size);
|
||||
|
||||
auto vec_load_info =
|
||||
PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
fake_vec_fid.get(),
|
||||
{vec_field_data_0, vec_field_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(vec_load_info);
|
||||
|
||||
// Row IDs
|
||||
std::vector<int64_t> row_ids_chunk0 = {0, 1, 2, 3, 4};
|
||||
auto row_ids_data_0 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
row_ids_data_0->FillFieldData(row_ids_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<int64_t> row_ids_chunk1 = {5, 6, 7, 8, 9};
|
||||
auto row_ids_data_1 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
row_ids_data_1->FillFieldData(row_ids_chunk1.data(), chunk_size);
|
||||
|
||||
auto row_id_load_info =
|
||||
PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
RowFieldID.get(),
|
||||
{row_ids_data_0, row_ids_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(row_id_load_info);
|
||||
|
||||
// Timestamps
|
||||
std::vector<int64_t> timestamps_chunk0 = {1, 1, 1, 1, 1};
|
||||
auto ts_data_0 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
ts_data_0->FillFieldData(timestamps_chunk0.data(), chunk_size);
|
||||
|
||||
std::vector<int64_t> timestamps_chunk1 = {1, 1, 1, 1, 1};
|
||||
auto ts_data_1 = storage::CreateFieldData(
|
||||
DataType::INT64, DataType::NONE, false, 1, chunk_size);
|
||||
ts_data_1->FillFieldData(timestamps_chunk1.data(), chunk_size);
|
||||
|
||||
auto ts_load_info = PrepareSingleFieldInsertBinlog(kCollectionID,
|
||||
kPartitionID,
|
||||
kSegmentID,
|
||||
TimestampFieldID.get(),
|
||||
{ts_data_0, ts_data_1},
|
||||
cm);
|
||||
segment->LoadFieldData(ts_load_info);
|
||||
|
||||
// Build the expression:
|
||||
// string_field == "target_value" AND int64_field == 999 AND float_field > 60
|
||||
//
|
||||
// Due to expression reordering, this will execute as:
|
||||
// 1. float_field > 60 (numeric, runs first) - SkipIndex skips chunk 0
|
||||
// 2. int64_field == 999 (numeric, runs second)
|
||||
// 3. string_field == "target_value" (string, runs last)
|
||||
|
||||
// string_field == "target_value"
|
||||
proto::plan::GenericValue string_val;
|
||||
string_val.set_string_val("target_value");
|
||||
auto string_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(string_fid, DataType::VARCHAR),
|
||||
proto::plan::OpType::Equal,
|
||||
string_val,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
|
||||
// int64_field == 999
|
||||
proto::plan::GenericValue int64_val;
|
||||
int64_val.set_int64_val(999);
|
||||
auto int64_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::Equal,
|
||||
int64_val,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
|
||||
// float_field > 60
|
||||
proto::plan::GenericValue float_val;
|
||||
float_val.set_float_val(60.0f);
|
||||
auto float_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(float_fid, DataType::FLOAT),
|
||||
proto::plan::OpType::GreaterThan,
|
||||
float_val,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
|
||||
// Build AND expression: string_expr AND int64_expr AND float_expr
|
||||
auto and_expr1 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, string_expr, int64_expr);
|
||||
auto and_expr2 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, and_expr1, float_expr);
|
||||
|
||||
// Verify SkipIndex is working before running the expression:
|
||||
// Check if chunk 0 can be skipped for float > 60
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
bool chunk0_can_skip = skip_index.CanSkipUnaryRange<float>(
|
||||
float_fid, 0, proto::plan::OpType::GreaterThan, 60.0f);
|
||||
bool chunk1_can_skip = skip_index.CanSkipUnaryRange<float>(
|
||||
float_fid, 1, proto::plan::OpType::GreaterThan, 60.0f);
|
||||
|
||||
// Chunk 0 should be skippable (max=50 < 60), chunk 1 should not (min=65 > 60)
|
||||
EXPECT_TRUE(chunk0_can_skip)
|
||||
<< "Chunk 0 should be skippable for float > 60 (max=50)";
|
||||
EXPECT_FALSE(chunk1_can_skip)
|
||||
<< "Chunk 1 should NOT be skippable for float > 60 (min=65)";
|
||||
|
||||
std::vector<milvus::plan::PlanNodePtr> sources;
|
||||
auto filter_node = std::make_shared<milvus::plan::FilterBitsNode>(
|
||||
"plannode id 1", and_expr2, sources);
|
||||
auto plan = plan::PlanFragment(filter_node);
|
||||
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
"test_skip_index_bitmap_alignment",
|
||||
segment.get(),
|
||||
chunk_size * 2, // total rows
|
||||
MAX_TIMESTAMP,
|
||||
0,
|
||||
0,
|
||||
query::PlanOptions{false},
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
|
||||
auto task = Task::Create("task_skip_index_bitmap", plan, 0, query_context);
|
||||
|
||||
int64_t total_rows = 0;
|
||||
int64_t filtered_rows = 0;
|
||||
for (;;) {
|
||||
auto result = task->Next();
|
||||
if (!result) {
|
||||
break;
|
||||
}
|
||||
auto col_vec =
|
||||
std::dynamic_pointer_cast<ColumnVector>(result->child(0));
|
||||
if (col_vec && col_vec->IsBitmap()) {
|
||||
TargetBitmapView view(col_vec->GetRawData(), col_vec->size());
|
||||
total_rows += col_vec->size();
|
||||
filtered_rows +=
|
||||
view.count(); // These are filtered OUT (don't match)
|
||||
}
|
||||
}
|
||||
|
||||
int64_t num_matched = total_rows - filtered_rows;
|
||||
|
||||
// Expected result: exactly 1 row should match
|
||||
// - Row at chunk 1, index 2 (global index 7) has:
|
||||
// - string_field = "target_value" ✓
|
||||
// - int64_field = 999 ✓
|
||||
// - float_field = 75 > 60 ✓
|
||||
//
|
||||
// With the bug (before fix): 0 rows would match because bitmap_input
|
||||
// indices were misaligned after chunk 0 was skipped.
|
||||
//
|
||||
// With the fix: 1 row should match correctly.
|
||||
EXPECT_EQ(num_matched, 1);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user