diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index df03aee0b3..974ea7c659 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -25,6 +25,31 @@ void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto input = context.get_offset_input(); SetHasOffsetInput((input != nullptr)); + + if (expr_->vals_.empty()) { + auto next_batch_size = GetNextBatchSize(); + auto real_batch_size = has_offset_input_ + ? context.get_offset_input()->size() + : next_batch_size; + if (real_batch_size == 0) { + result = nullptr; + return; + } + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size, false), + TargetBitmap(real_batch_size, true)); + + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + + res.set(); + valid_res.set(); + + result = res_vec; + current_data_chunk_pos_ += real_batch_size; + return; + } + switch (expr_->column_.data_type_) { case DataType::ARRAY: { if (is_index_mode_ && !has_offset_input_) { diff --git a/internal/core/unittest/test_array_expr.cpp b/internal/core/unittest/test_array_expr.cpp index 1be5522082..f9a6b6fca1 100644 --- a/internal/core/unittest/test_array_expr.cpp +++ b/internal/core/unittest/test_array_expr.cpp @@ -1430,6 +1430,73 @@ TEST(Expr, TestArrayContains) { } } +TEST(Expr, TestArrayContainsEmptyValues) { + auto schema = std::make_shared(); + auto int_array_fid = + schema->AddDebugField("int_array", DataType::ARRAY, DataType::INT8); + auto long_array_fid = + schema->AddDebugField("long_array", DataType::ARRAY, DataType::INT64); + auto bool_array_fid = + schema->AddDebugField("bool_array", DataType::ARRAY, DataType::BOOL); + auto float_array_fid = + schema->AddDebugField("float_array", DataType::ARRAY, DataType::FLOAT); + auto double_array_fid = schema->AddDebugField( + "double_array", DataType::ARRAY, DataType::DOUBLE); + auto string_array_fid = schema->AddDebugField( + "string_array", DataType::ARRAY, DataType::VARCHAR); + schema->set_primary_field_id(schema->AddDebugField("id", DataType::INT64)); + std::vector fields = { + int_array_fid, + long_array_fid, + bool_array_fid, + float_array_fid, + double_array_fid, + string_array_fid, + }; + + auto dummy_seg = CreateGrowingSegment(schema, empty_index_meta); + + int N = 1000; + std::vector age_col; + int num_iters = 100; + for (int iter = 0; iter < num_iters; ++iter) { + auto raw_data = DataGen(schema, N, iter); + dummy_seg->PreInsert(N); + dummy_seg->Insert(iter * N, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + } + + auto seg_promote = dynamic_cast(dummy_seg.get()); + std::vector empty_values; + + for (auto field_id : fields) { + auto start = std::chrono::steady_clock::now(); + auto expr = std::make_shared( + expr::ColumnInfo(field_id, DataType::ARRAY), + proto::plan::JSONContainsExpr_JSONOp_ContainsAny, + true, + empty_values); + + BitsetType final; + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + final = + ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); + std::cout << "cost" + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count() + << std::endl; + EXPECT_EQ(final.size(), N * num_iters); + for (int i = 0; i < N * num_iters; ++i) { + ASSERT_EQ(final[i], true); + } + } +} + TEST(Expr, TestArrayBinaryArith) { auto schema = std::make_shared(); auto i64_fid = schema->AddDebugField("id", DataType::INT64); diff --git a/internal/core/unittest/test_array_inverted_index.cpp b/internal/core/unittest/test_array_inverted_index.cpp index 5e2a74d1cb..695e9befd2 100644 --- a/internal/core/unittest/test_array_inverted_index.cpp +++ b/internal/core/unittest/test_array_inverted_index.cpp @@ -164,6 +164,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) { auto ref = [this, &elems](size_t offset) -> bool { std::unordered_set row(this->vec_of_array_[offset].begin(), this->vec_of_array_[offset].end()); + if (elems.empty()) { + return true; + } + for (const auto& elem : elems) { if (row.find(elem) != row.end()) { return true; @@ -212,6 +216,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAll) { auto ref = [this, &elems](size_t offset) -> bool { std::unordered_set row(this->vec_of_array_[offset].begin(), this->vec_of_array_[offset].end()); + if (elems.empty()) { + return true; + } + for (const auto& elem : elems) { if (row.find(elem) == row.end()) { return false;