fix: fix pk in [..] skip next batch when using multi-chunk segment (#43619)

pr: #43618

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-07-31 16:59:37 +08:00 committed by GitHub
parent 75463725b3
commit ea7307747a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 48 additions and 33 deletions

View File

@ -27,26 +27,23 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
SetHasOffsetInput((input != nullptr));
if (expr_->vals_.empty()) {
auto next_batch_size = GetNextBatchSize();
auto real_batch_size = has_offset_input_
? context.get_offset_input()->size()
: next_batch_size;
: GetNextBatchSize();
if (real_batch_size == 0) {
result = nullptr;
return;
}
auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
res.set();
valid_res.set();
result = res_vec;
current_data_chunk_pos_ += real_batch_size;
if (expr_->op_ == proto::plan::JSONContainsExpr_JSONOp_ContainsAll) {
result = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size, true),
TargetBitmap(real_batch_size, true));
} else {
result = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
}
MoveCursor();
return;
}

View File

@ -207,27 +207,16 @@ PhyTermFilterExpr::ExecPkTermImpl() {
InitPkCacheOffset();
}
auto real_batch_size =
current_data_chunk_pos_ + batch_size_ >= active_count_
? active_count_ - current_data_chunk_pos_
: batch_size_;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
auto current_chunk_view =
cached_bits_.view(current_data_chunk_pos_, real_batch_size);
res |= current_chunk_view;
current_data_chunk_pos_ += real_batch_size;
return res_vec;
TargetBitmap result;
result.append(cached_bits_, current_data_global_pos_, real_batch_size);
MoveCursor();
return std::make_shared<ColumnVector>(std::move(result),
TargetBitmap(real_batch_size, true));
}
template <typename ValueType>

View File

@ -1492,7 +1492,7 @@ TEST(Expr, TestArrayContainsEmptyValues) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
for (int i = 0; i < N * num_iters; ++i) {
ASSERT_EQ(final[i], true);
ASSERT_EQ(final[i], false);
}
}
}

View File

@ -165,7 +165,7 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) {
std::unordered_set<TypeParam> row(this->vec_of_array_[offset].begin(),
this->vec_of_array_[offset].end());
if (elems.empty()) {
return true;
return false;
}
for (const auto& elem : elems) {

View File

@ -279,6 +279,35 @@ class TestChunkSegment : public testing::Test {
std::unordered_map<std::string, FieldId> fields;
};
TEST_F(TestChunkSegment, TestSkipNextTermExpr) {
// test segment with 2 chunks and expr is: int64 >= 10000 and pk in (10001, 10002, 10003, 10004, 10005)
proto::plan::GenericValue v1;
v1.set_int64_val(10000);
auto first_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(fields.at("int64"), DataType::INT64),
proto::plan::OpType::GreaterEqual,
v1);
std::vector<proto::plan::GenericValue> v2;
for (int i = 1; i <= 5; ++i) {
proto::plan::GenericValue v;
v.set_int64_val(i + 10000);
v2.push_back(v);
}
auto second_expr = std::make_shared<expr::TermFilterExpr>(
expr::ColumnInfo(fields.at("pk"), DataType::INT64), v2);
auto and_expr = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, first_expr, second_expr);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, and_expr);
auto final = query::ExecuteQueryExpr(
plan, segment.get(), chunk_num * test_data_count, MAX_TIMESTAMP);
ASSERT_EQ(5, final.count());
for (int i = 10000; i <= 10004; ++i) {
ASSERT_EQ(true, final[i]) << "i: " << i;
}
}
TEST_F(TestChunkSegment, TestTermExpr) {
// query int64 expr
std::vector<proto::plan::GenericValue> filter_data;