mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
fix: prevent crash when contains_all/any is used with empty array (#41831)
issue: https://github.com/milvus-io/milvus/issues/41348 related and optimized by https://github.com/milvus-io/milvus/pull/41347 master pr: https://github.com/milvus-io/milvus/pull/41739 2.5 pr: #41756 Signed-off-by: Cai Zhang <cai.zhang@zilliz.com> Co-authored-by: Sangho Park <hoyaspark@gmail.com>
This commit is contained in:
parent
2c8eb28828
commit
e57cf1c8b3
@ -25,6 +25,31 @@ void
|
||||
PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
auto input = context.get_offset_input();
|
||||
SetHasOffsetInput((input != nullptr));
|
||||
|
||||
if (expr_->vals_.empty()) {
|
||||
auto next_batch_size = GetNextBatchSize();
|
||||
auto real_batch_size = has_offset_input_
|
||||
? context.get_offset_input()->size()
|
||||
: next_batch_size;
|
||||
if (real_batch_size == 0) {
|
||||
result = nullptr;
|
||||
return;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
res.set();
|
||||
valid_res.set();
|
||||
|
||||
result = res_vec;
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (expr_->column_.data_type_) {
|
||||
case DataType::ARRAY: {
|
||||
if (is_index_mode_ && !has_offset_input_) {
|
||||
|
||||
@ -1430,6 +1430,73 @@ TEST(Expr, TestArrayContains) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestArrayContainsEmptyValues) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto int_array_fid =
|
||||
schema->AddDebugField("int_array", DataType::ARRAY, DataType::INT8);
|
||||
auto long_array_fid =
|
||||
schema->AddDebugField("long_array", DataType::ARRAY, DataType::INT64);
|
||||
auto bool_array_fid =
|
||||
schema->AddDebugField("bool_array", DataType::ARRAY, DataType::BOOL);
|
||||
auto float_array_fid =
|
||||
schema->AddDebugField("float_array", DataType::ARRAY, DataType::FLOAT);
|
||||
auto double_array_fid = schema->AddDebugField(
|
||||
"double_array", DataType::ARRAY, DataType::DOUBLE);
|
||||
auto string_array_fid = schema->AddDebugField(
|
||||
"string_array", DataType::ARRAY, DataType::VARCHAR);
|
||||
schema->set_primary_field_id(schema->AddDebugField("id", DataType::INT64));
|
||||
std::vector<FieldId> fields = {
|
||||
int_array_fid,
|
||||
long_array_fid,
|
||||
bool_array_fid,
|
||||
float_array_fid,
|
||||
double_array_fid,
|
||||
string_array_fid,
|
||||
};
|
||||
|
||||
auto dummy_seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
|
||||
int N = 1000;
|
||||
std::vector<int> age_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
dummy_seg->PreInsert(N);
|
||||
dummy_seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(dummy_seg.get());
|
||||
std::vector<proto::plan::GenericValue> empty_values;
|
||||
|
||||
for (auto field_id : fields) {
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
auto expr = std::make_shared<milvus::expr::JsonContainsExpr>(
|
||||
expr::ColumnInfo(field_id, DataType::ARRAY),
|
||||
proto::plan::JSONContainsExpr_JSONOp_ContainsAny,
|
||||
true,
|
||||
empty_values);
|
||||
|
||||
BitsetType final;
|
||||
auto plan =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||
final =
|
||||
ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
|
||||
std::cout << "cost"
|
||||
<< std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::steady_clock::now() - start)
|
||||
.count()
|
||||
<< std::endl;
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
ASSERT_EQ(final[i], true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestArrayBinaryArith) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
|
||||
@ -164,6 +164,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) {
|
||||
auto ref = [this, &elems](size_t offset) -> bool {
|
||||
std::unordered_set<TypeParam> row(this->vec_of_array_[offset].begin(),
|
||||
this->vec_of_array_[offset].end());
|
||||
if (elems.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto& elem : elems) {
|
||||
if (row.find(elem) != row.end()) {
|
||||
return true;
|
||||
@ -212,6 +216,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAll) {
|
||||
auto ref = [this, &elems](size_t offset) -> bool {
|
||||
std::unordered_set<TypeParam> row(this->vec_of_array_[offset].begin(),
|
||||
this->vec_of_array_[offset].end());
|
||||
if (elems.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto& elem : elems) {
|
||||
if (row.find(elem) == row.end()) {
|
||||
return false;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user