enhance: reorder sub expr for conjunct expr (#39872)

two point:
 (1) reoder conjucts expr's subexpr, postpone heavy operations
sequence: int(column) -> index(column) -> string(column) -> light
conjuct
...... -> json(column) -> heavy conjuct -> two_column_compare
(2) support pre filter for expr execute, skip scan raw data that had
been skipped
     because of preceding expr result.

#39869

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-03-19 14:50:14 +08:00 committed by GitHub
parent 8db708f67d
commit 6c55db44f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 1765 additions and 388 deletions

View File

@ -889,6 +889,7 @@ common:
localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode. localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode.
sync: sync:
taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
enabledOptimizeExpr: true # Indicates whether to enable optimize expr
# QuotaConfig, configurations of Milvus quota and limits. # QuotaConfig, configurations of Milvus quota and limits.
# By default, we enable: # By default, we enable:

View File

@ -28,6 +28,7 @@ int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT =
DEFAULT_LOW_PRIORITY_THREAD_CORE_COEFFICIENT; DEFAULT_LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
int CPU_NUM = DEFAULT_CPU_NUM; int CPU_NUM = DEFAULT_CPU_NUM;
int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE; int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE;
bool OPTIMIZE_EXPR_ENABLED = DEFAULT_OPTIMIZE_EXPR_ENABLED;
void void
SetIndexSliceSize(const int64_t size) { SetIndexSliceSize(const int64_t size) {
@ -67,4 +68,10 @@ SetCpuNum(const int num) {
CPU_NUM = num; CPU_NUM = num;
} }
void
SetDefaultOptimizeExprEnable(bool val) {
OPTIMIZE_EXPR_ENABLED = val;
LOG_INFO("set default optimize expr enabled: {}", OPTIMIZE_EXPR_ENABLED);
}
} // namespace milvus } // namespace milvus

View File

@ -29,6 +29,7 @@ extern int64_t MIDDLE_PRIORITY_THREAD_CORE_COEFFICIENT;
extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT; extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
extern int CPU_NUM; extern int CPU_NUM;
extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE; extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE;
extern bool OPTIMIZE_EXPR_ENABLED;
void void
SetIndexSliceSize(const int64_t size); SetIndexSliceSize(const int64_t size);
@ -48,6 +49,9 @@ SetCpuNum(const int core);
void void
SetDefaultExecEvalExprBatchSize(int64_t val); SetDefaultExecEvalExprBatchSize(int64_t val);
void
SetDefaultOptimizeExprEnable(bool val);
struct BufferView { struct BufferView {
struct Element { struct Element {
const char* data_; const char* data_;

View File

@ -79,4 +79,5 @@ const int64_t DEFAULT_HYBRID_INDEX_BITMAP_CARDINALITY_LIMIT = 100;
const size_t MARISA_NULL_KEY_ID = -1; const size_t MARISA_NULL_KEY_ID = -1;
const std::string JSON_CAST_TYPE = "json_cast_type"; const std::string JSON_CAST_TYPE = "json_cast_type";
const std::string JSON_PATH = "json_path"; const std::string JSON_PATH = "json_path";
const bool DEFAULT_OPTIMIZE_EXPR_ENABLED = true;

View File

@ -254,6 +254,11 @@ IsFloatDataType(DataType data_type) {
} }
} }
inline bool
IsNumericDataType(DataType data_type) {
return IsIntegerDataType(data_type) || IsFloatDataType(data_type);
}
inline bool inline bool
IsStringDataType(DataType data_type) { IsStringDataType(DataType data_type) {
switch (data_type) { switch (data_type) {

View File

@ -25,7 +25,7 @@
#include "common/Tracer.h" #include "common/Tracer.h"
#include "log/Log.h" #include "log/Log.h"
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6; std::once_flag flag1, flag2, flag3, flag4, flag5, flag6, flag7;
std::once_flag traceFlag; std::once_flag traceFlag;
void void
@ -78,6 +78,14 @@ InitDefaultExprEvalBatchSize(int64_t val) {
val); val);
} }
void
InitDefaultOptimizeExprEnable(bool val) {
std::call_once(
flag7,
[](bool val) { milvus::SetDefaultOptimizeExprEnable(val); },
val);
}
void void
InitTrace(CTraceConfig* config) { InitTrace(CTraceConfig* config) {
auto traceConfig = milvus::tracer::TraceConfig{config->exporter, auto traceConfig = milvus::tracer::TraceConfig{config->exporter,

View File

@ -48,6 +48,9 @@ InitTrace(CTraceConfig* config);
void void
SetTrace(CTraceConfig* config); SetTrace(CTraceConfig* config);
void
InitDefaultOptimizeExprEnable(bool val);
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif

View File

@ -35,8 +35,9 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
return; return;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);

View File

@ -57,6 +57,21 @@ class PhyAlwaysTrueExpr : public Expr {
} }
} }
std::string
ToString() const override {
return "[AlwaysTrue]";
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
std::shared_ptr<const milvus::expr::AlwaysTrueExpr> expr_; std::shared_ptr<const milvus::expr::AlwaysTrueExpr> expr_;
int64_t active_count_; int64_t active_count_;

View File

@ -117,11 +117,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) { if (!arg_inited_) {
value_arg_.SetValue<ValueType>(expr_->value_); value_arg_.SetValue<ValueType>(expr_->value_);
@ -535,11 +535,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
int index = -1; int index = -1;
if (expr_->column_.nested_path_.size() > 0) { if (expr_->column_.nested_path_.size() > 0) {
@ -1435,11 +1435,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) { if (!arg_inited_) {
value_arg_.SetValue<HighPrecisionType>(expr_->value_); value_arg_.SetValue<HighPrecisionType>(expr_->value_);

View File

@ -464,6 +464,21 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
std::string
ToString() const override {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
template <typename T> template <typename T>
VectorPtr VectorPtr

View File

@ -28,31 +28,31 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
SetHasOffsetInput((input != nullptr)); SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) { switch (expr_->column_.data_type_) {
case DataType::BOOL: { case DataType::BOOL: {
result = ExecRangeVisitorImpl<bool>(input); result = ExecRangeVisitorImpl<bool>(context);
break; break;
} }
case DataType::INT8: { case DataType::INT8: {
result = ExecRangeVisitorImpl<int8_t>(input); result = ExecRangeVisitorImpl<int8_t>(context);
break; break;
} }
case DataType::INT16: { case DataType::INT16: {
result = ExecRangeVisitorImpl<int16_t>(input); result = ExecRangeVisitorImpl<int16_t>(context);
break; break;
} }
case DataType::INT32: { case DataType::INT32: {
result = ExecRangeVisitorImpl<int32_t>(input); result = ExecRangeVisitorImpl<int32_t>(context);
break; break;
} }
case DataType::INT64: { case DataType::INT64: {
result = ExecRangeVisitorImpl<int64_t>(input); result = ExecRangeVisitorImpl<int64_t>(context);
break; break;
} }
case DataType::FLOAT: { case DataType::FLOAT: {
result = ExecRangeVisitorImpl<float>(input); result = ExecRangeVisitorImpl<float>(context);
break; break;
} }
case DataType::DOUBLE: { case DataType::DOUBLE: {
result = ExecRangeVisitorImpl<double>(input); result = ExecRangeVisitorImpl<double>(context);
break; break;
} }
case DataType::VARCHAR: { case DataType::VARCHAR: {
@ -60,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance() !storage::MmapManager::GetInstance()
.GetMmapConfig() .GetMmapConfig()
.growing_enable_mmap) { .growing_enable_mmap) {
result = ExecRangeVisitorImpl<std::string>(input); result = ExecRangeVisitorImpl<std::string>(context);
} else { } else {
result = ExecRangeVisitorImpl<std::string_view>(input); result = ExecRangeVisitorImpl<std::string_view>(context);
} }
break; break;
} }
@ -70,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto value_type = expr_->lower_val_.val_case(); auto value_type = expr_->lower_val_.val_case();
switch (value_type) { switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: { case proto::plan::GenericValue::ValCase::kInt64Val: {
result = ExecRangeVisitorImplForJson<int64_t>(input); result = ExecRangeVisitorImplForJson<int64_t>(context);
break; break;
} }
case proto::plan::GenericValue::ValCase::kFloatVal: { case proto::plan::GenericValue::ValCase::kFloatVal: {
result = ExecRangeVisitorImplForJson<double>(input); result = ExecRangeVisitorImplForJson<double>(context);
break; break;
} }
case proto::plan::GenericValue::ValCase::kStringVal: { case proto::plan::GenericValue::ValCase::kStringVal: {
result = ExecRangeVisitorImplForJson<std::string>(input); result = ExecRangeVisitorImplForJson<std::string>(context);
break; break;
} }
default: { default: {
@ -95,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (value_type) { switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: { case proto::plan::GenericValue::ValCase::kInt64Val: {
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplForArray<int64_t>(input); result = ExecRangeVisitorImplForArray<int64_t>(context);
break; break;
} }
case proto::plan::GenericValue::ValCase::kFloatVal: { case proto::plan::GenericValue::ValCase::kFloatVal: {
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplForArray<double>(input); result = ExecRangeVisitorImplForArray<double>(context);
break; break;
} }
case proto::plan::GenericValue::ValCase::kStringVal: { case proto::plan::GenericValue::ValCase::kStringVal: {
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplForArray<std::string>(input); result = ExecRangeVisitorImplForArray<std::string>(context);
break; break;
} }
default: { default: {
@ -126,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
if (is_index_mode_ && !has_offset_input_) { if (is_index_mode_ && !has_offset_input_) {
return ExecRangeVisitorImplForIndex<T>(); return ExecRangeVisitorImplForIndex<T>();
} else { } else {
return ExecRangeVisitorImplForData<T>(input); return ExecRangeVisitorImplForData<T>(context);
} }
} }
@ -235,7 +235,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
@ -246,6 +246,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
IndexInnerType> IndexInnerType>
HighPrecisionType; HighPrecisionType;
const auto& bitmap_input = context.get_bitmap_input();
auto* input = context.get_offset_input();
HighPrecisionType val1; HighPrecisionType val1;
HighPrecisionType val2; HighPrecisionType val2;
bool lower_inclusive = false; bool lower_inclusive = false;
@ -260,15 +262,16 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[ lower_inclusive, [ lower_inclusive, upper_inclusive, &processed_cursor, &
upper_inclusive ]<FilterType filter_type = FilterType::sequential>( bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const T* data, const T* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -279,16 +282,44 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
HighPrecisionType val2) { HighPrecisionType val2) {
if (lower_inclusive && upper_inclusive) { if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, true, true, filter_type> func; BinaryRangeElementFunc<T, true, true, filter_type> func;
func(val1, val2, data, size, res, offsets); func(val1,
val2,
data,
size,
res,
bitmap_input,
processed_cursor,
offsets);
} else if (lower_inclusive && !upper_inclusive) { } else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFunc<T, true, false, filter_type> func; BinaryRangeElementFunc<T, true, false, filter_type> func;
func(val1, val2, data, size, res, offsets); func(val1,
val2,
data,
size,
res,
bitmap_input,
processed_cursor,
offsets);
} else if (!lower_inclusive && upper_inclusive) { } else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, false, true, filter_type> func; BinaryRangeElementFunc<T, false, true, filter_type> func;
func(val1, val2, data, size, res, offsets); func(val1,
val2,
data,
size,
res,
bitmap_input,
processed_cursor,
offsets);
} else { } else {
BinaryRangeElementFunc<T, false, false, filter_type> func; BinaryRangeElementFunc<T, false, false, filter_type> func;
func(val1, val2, data, size, res, offsets); func(val1,
val2,
data,
size,
res,
bitmap_input,
processed_cursor,
offsets);
} }
// there is a batch operation in BinaryRangeElementFunc, // there is a batch operation in BinaryRangeElementFunc,
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered // so not divide data again for the reason that it may reduce performance if the null distribution is scattered
@ -304,6 +335,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
} }
} }
} }
processed_cursor += size;
}; };
auto skip_index_func = auto skip_index_func =
@ -346,20 +378,23 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) { PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
const auto& bitmap_input = context.get_bitmap_input();
auto* input = context.get_offset_input();
FieldId field_id = expr_->column_.field_id_;
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
bool lower_inclusive = expr_->lower_inclusive_; bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_; bool upper_inclusive = expr_->upper_inclusive_;
@ -372,9 +407,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
ValueType val2 = upper_arg_.GetValue<ValueType>(); ValueType val2 = upper_arg_.GetValue<ValueType>();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[ lower_inclusive, upper_inclusive, [
pointer ]<FilterType filter_type = FilterType::sequential>( lower_inclusive,
upper_inclusive,
pointer,
&bitmap_input,
&processed_cursor
]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -394,6 +435,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else if (lower_inclusive && !upper_inclusive) { } else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type> BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
@ -406,6 +449,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else if (!lower_inclusive && upper_inclusive) { } else if (!lower_inclusive && upper_inclusive) {
@ -419,6 +464,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else { } else {
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type> BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
@ -431,8 +478,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
@ -457,20 +507,22 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) { PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
const auto& bitmap_input = context.get_bitmap_input();
auto* input = context.get_offset_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
bool lower_inclusive = expr_->lower_inclusive_; bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_; bool upper_inclusive = expr_->upper_inclusive_;
@ -488,9 +540,10 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
index = std::stoi(expr_->column_.nested_path_[0]); index = std::stoi(expr_->column_.nested_path_[0]);
} }
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[ lower_inclusive, [ lower_inclusive, upper_inclusive, &processed_cursor, &
upper_inclusive ]<FilterType filter_type = FilterType::sequential>( bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data, const milvus::ArrayView* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -511,6 +564,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else if (lower_inclusive && !upper_inclusive) { } else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type> BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
@ -523,6 +578,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else if (!lower_inclusive && upper_inclusive) { } else if (!lower_inclusive && upper_inclusive) {
@ -536,6 +593,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} else { } else {
@ -549,9 +608,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
size, size,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
processed_size = processed_size =

View File

@ -44,9 +44,17 @@ struct BinaryRangeElementFunc {
const T* src, const T* src,
size_t n, size_t n,
TargetBitmapView res, TargetBitmapView res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random ||
std::is_same_v<T, std::string> ||
std::is_same_v<T, std::string_view>) {
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
continue;
}
auto offset = (offsets) ? offsets[i] : i; auto offset = (offsets) ? offsets[i] : i;
if constexpr (lower_inclusive && upper_inclusive) { if constexpr (lower_inclusive && upper_inclusive) {
res[i] = val1 <= src[offset] && src[offset] <= val2; res[i] = val1 <= src[offset] && src[offset] <= val2;
@ -83,6 +91,9 @@ struct BinaryRangeElementFunc {
res[i] = valid_res[i] = false; \ res[i] = valid_res[i] = false; \
break; \ break; \
} \ } \
if (has_bitmap_input && !bitmap_input[i + start_cursor]) { \
break; \
} \
auto x = src[offset].template at<GetType>(pointer); \ auto x = src[offset].template at<GetType>(pointer); \
if (x.error()) { \ if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \ if constexpr (std::is_same_v<GetType, int64_t>) { \
@ -117,7 +128,10 @@ struct BinaryRangeElementFuncForJson {
size_t n, size_t n,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -153,8 +167,14 @@ struct BinaryRangeElementFuncForArray {
size_t n, size_t n,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
continue;
}
size_t offset = i; size_t offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i; offset = (offsets) ? offsets[i] : i;
@ -240,6 +260,21 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
// Check overflow and cache result for performace // Check overflow and cache result for performace
template < template <
@ -259,7 +294,7 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecRangeVisitorImpl(OffsetVector* input = nullptr); ExecRangeVisitorImpl(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
@ -267,15 +302,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecRangeVisitorImplForData(OffsetVector* input = nullptr); ExecRangeVisitorImplForData(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr); ExecRangeVisitorImplForJson(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr); ExecRangeVisitorImplForArray(EvalCtx& context);
private: private:
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_; std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;

View File

@ -68,6 +68,21 @@ class PhyCallExpr : public Expr {
} }
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return false;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
std::shared_ptr<const milvus::expr::CallExpr> expr_; std::shared_ptr<const milvus::expr::CallExpr> expr_;

View File

@ -114,6 +114,21 @@ class PhyColumnExpr : public Expr {
VectorPtr VectorPtr
DoEval(OffsetVector* input = nullptr); DoEval(OffsetVector* input = nullptr);
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->GetColumn();
}
private: private:
bool is_indexed_; bool is_indexed_;

View File

@ -38,20 +38,20 @@ PhyCompareFilterExpr::GetNextBatchSize() {
template <typename OpType> template <typename OpType>
VectorPtr VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, EvalCtx& context) {
OffsetVector* input) {
// take offsets as input // take offsets as input
auto input = context.get_offset_input();
if (has_offset_input_) { if (has_offset_input_) {
auto real_batch_size = input->size(); auto real_batch_size = input->size();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data( auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
expr_->left_field_id_); expr_->left_field_id_);
@ -215,37 +215,37 @@ PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
// For segment both fields has no index, can use SIMD to speed up. // For segment both fields has no index, can use SIMD to speed up.
// Avoiding too much call stack that blocks SIMD. // Avoiding too much call stack that blocks SIMD.
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) { if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
result = ExecCompareExprDispatcherForBothDataSegment(input); result = ExecCompareExprDispatcherForBothDataSegment(context);
return; return;
} }
result = ExecCompareExprDispatcherForHybridSegment(input); result = ExecCompareExprDispatcherForHybridSegment(context);
} }
VectorPtr VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment( PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
OffsetVector* input) { EvalCtx& context) {
switch (expr_->op_type_) { switch (expr_->op_type_) {
case OpType::Equal: { case OpType::Equal: {
return ExecCompareExprDispatcher(std::equal_to<>{}, input); return ExecCompareExprDispatcher(std::equal_to<>{}, context);
} }
case OpType::NotEqual: { case OpType::NotEqual: {
return ExecCompareExprDispatcher(std::not_equal_to<>{}, input); return ExecCompareExprDispatcher(std::not_equal_to<>{}, context);
} }
case OpType::GreaterEqual: { case OpType::GreaterEqual: {
return ExecCompareExprDispatcher(std::greater_equal<>{}, input); return ExecCompareExprDispatcher(std::greater_equal<>{}, context);
} }
case OpType::GreaterThan: { case OpType::GreaterThan: {
return ExecCompareExprDispatcher(std::greater<>{}, input); return ExecCompareExprDispatcher(std::greater<>{}, context);
} }
case OpType::LessEqual: { case OpType::LessEqual: {
return ExecCompareExprDispatcher(std::less_equal<>{}, input); return ExecCompareExprDispatcher(std::less_equal<>{}, context);
} }
case OpType::LessThan: { case OpType::LessThan: {
return ExecCompareExprDispatcher(std::less<>{}, input); return ExecCompareExprDispatcher(std::less<>{}, context);
} }
case OpType::PrefixMatch: { case OpType::PrefixMatch: {
return ExecCompareExprDispatcher( return ExecCompareExprDispatcher(
milvus::query::MatchOp<OpType::PrefixMatch>{}, input); milvus::query::MatchOp<OpType::PrefixMatch>{}, context);
} }
// case OpType::PostfixMatch: { // case OpType::PostfixMatch: {
// } // }
@ -257,22 +257,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
VectorPtr VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment( PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
OffsetVector* input) { EvalCtx& context) {
switch (expr_->left_data_type_) { switch (expr_->left_data_type_) {
case DataType::BOOL: case DataType::BOOL:
return ExecCompareLeftType<bool>(input); return ExecCompareLeftType<bool>(context);
case DataType::INT8: case DataType::INT8:
return ExecCompareLeftType<int8_t>(input); return ExecCompareLeftType<int8_t>(context);
case DataType::INT16: case DataType::INT16:
return ExecCompareLeftType<int16_t>(input); return ExecCompareLeftType<int16_t>(context);
case DataType::INT32: case DataType::INT32:
return ExecCompareLeftType<int32_t>(input); return ExecCompareLeftType<int32_t>(context);
case DataType::INT64: case DataType::INT64:
return ExecCompareLeftType<int64_t>(input); return ExecCompareLeftType<int64_t>(context);
case DataType::FLOAT: case DataType::FLOAT:
return ExecCompareLeftType<float>(input); return ExecCompareLeftType<float>(context);
case DataType::DOUBLE: case DataType::DOUBLE:
return ExecCompareLeftType<double>(input); return ExecCompareLeftType<double>(context);
default: default:
PanicInfo( PanicInfo(
DataTypeInvalid, DataTypeInvalid,
@ -283,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) { PhyCompareFilterExpr::ExecCompareLeftType(EvalCtx& context) {
switch (expr_->right_data_type_) { switch (expr_->right_data_type_) {
case DataType::BOOL: case DataType::BOOL:
return ExecCompareRightType<T, bool>(input); return ExecCompareRightType<T, bool>(context);
case DataType::INT8: case DataType::INT8:
return ExecCompareRightType<T, int8_t>(input); return ExecCompareRightType<T, int8_t>(context);
case DataType::INT16: case DataType::INT16:
return ExecCompareRightType<T, int16_t>(input); return ExecCompareRightType<T, int16_t>(context);
case DataType::INT32: case DataType::INT32:
return ExecCompareRightType<T, int32_t>(input); return ExecCompareRightType<T, int32_t>(context);
case DataType::INT64: case DataType::INT64:
return ExecCompareRightType<T, int64_t>(input); return ExecCompareRightType<T, int64_t>(context);
case DataType::FLOAT: case DataType::FLOAT:
return ExecCompareRightType<T, float>(input); return ExecCompareRightType<T, float>(context);
case DataType::DOUBLE: case DataType::DOUBLE:
return ExecCompareRightType<T, double>(input); return ExecCompareRightType<T, double>(context);
default: default:
PanicInfo( PanicInfo(
DataTypeInvalid, DataTypeInvalid,
@ -309,61 +309,101 @@ PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
template <typename T, typename U> template <typename T, typename U>
VectorPtr VectorPtr
PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) { PhyCompareFilterExpr::ExecCompareRightType(EvalCtx& context) {
auto input = context.get_offset_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( const auto& bitmap_input = context.get_bitmap_input();
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto expr_type = expr_->op_type_; auto expr_type = expr_->op_type_;
auto execute_sub_batch = [expr_type]<FilterType filter_type = size_t processed_cursor = 0;
FilterType::sequential>( auto execute_sub_batch =
const T* left, [ expr_type, &bitmap_input, &
const U* right, processed_cursor ]<FilterType filter_type = FilterType::sequential>(
const int32_t* offsets, const T* left,
const int size, const U* right,
TargetBitmapView res) { const int32_t* offsets,
const int size,
TargetBitmapView res) {
switch (expr_type) { switch (expr_type) {
case proto::plan::GreaterThan: { case proto::plan::GreaterThan: {
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type> CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
func; func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::GreaterEqual: { case proto::plan::GreaterEqual: {
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type> CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
func; func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::LessThan: { case proto::plan::LessThan: {
CompareElementFunc<T, U, proto::plan::LessThan, filter_type> CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
func; func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::LessEqual: { case proto::plan::LessEqual: {
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type> CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
func; func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::Equal: { case proto::plan::Equal: {
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func; CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::NotEqual: { case proto::plan::NotEqual: {
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type> CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
func; func;
func(left, right, size, res, offsets); func(left,
right,
size,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
default: default:
@ -372,6 +412,7 @@ PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
"compare column expr: {}", "compare column expr: {}",
expr_type)); expr_type));
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {

View File

@ -40,6 +40,8 @@ struct CompareElementFunc {
const U* right, const U* right,
size_t size, size_t size,
TargetBitmapView res, TargetBitmapView res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
// This is the original code, kept here for the documentation purposes // This is the original code, kept here for the documentation purposes
// also, used for iterative filter // also, used for iterative filter
@ -69,6 +71,34 @@ struct CompareElementFunc {
return; return;
} }
if (!bitmap_input.empty()) {
for (int i = 0; i < size; ++i) {
if (!bitmap_input[start_cursor + i]) {
continue;
}
if constexpr (op == proto::plan::OpType::Equal) {
res[i] = left[i] == right[i];
} else if constexpr (op == proto::plan::OpType::NotEqual) {
res[i] = left[i] != right[i];
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
res[i] = left[i] > right[i];
} else if constexpr (op == proto::plan::OpType::LessThan) {
res[i] = left[i] < right[i];
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
res[i] = left[i] >= right[i];
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = left[i] <= right[i];
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported op_type:{} for CompareElementFunc",
op));
}
}
return;
}
if constexpr (op == proto::plan::OpType::Equal) { if constexpr (op == proto::plan::OpType::Equal) {
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>( res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
left, right, size); left, right, size);
@ -170,6 +200,21 @@ class PhyCompareFilterExpr : public Expr {
} }
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
int64_t int64_t
GetCurrentRows() { GetCurrentRows() {
@ -451,21 +496,21 @@ class PhyCompareFilterExpr : public Expr {
template <typename OpType> template <typename OpType>
VectorPtr VectorPtr
ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr); ExecCompareExprDispatcher(OpType op, EvalCtx& context);
VectorPtr VectorPtr
ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr); ExecCompareExprDispatcherForHybridSegment(EvalCtx& context);
VectorPtr VectorPtr
ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr); ExecCompareExprDispatcherForBothDataSegment(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecCompareLeftType(OffsetVector* input = nullptr); ExecCompareLeftType(EvalCtx& context);
template <typename T, typename U> template <typename T, typename U>
VectorPtr VectorPtr
ExecCompareRightType(OffsetVector* input = nullptr); ExecCompareRightType(EvalCtx& context);
private: private:
const FieldId left_field_; const FieldId left_field_;

View File

@ -83,16 +83,22 @@ PhyConjunctFilterExpr::CanSkipFollowingExprs(ColumnVectorPtr& vec) {
void void
PhyConjunctFilterExpr::SkipFollowingExprs(int start) { PhyConjunctFilterExpr::SkipFollowingExprs(int start) {
for (int i = start; i < inputs_.size(); ++i) { for (int i = start; i < input_order_.size(); ++i) {
inputs_[i]->MoveCursor(); inputs_[input_order_[i]]->MoveCursor();
} }
} }
void void
PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
for (int i = 0; i < inputs_.size(); ++i) { if (input_order_.empty()) {
input_order_.resize(inputs_.size());
for (size_t i = 0; i < inputs_.size(); i++) {
input_order_[i] = i;
}
}
for (int i = 0; i < input_order_.size(); ++i) {
VectorPtr input_result; VectorPtr input_result;
inputs_[i]->Eval(context, input_result); inputs_[input_order_[i]]->Eval(context, input_result);
if (i == 0) { if (i == 0) {
result = input_result; result = input_result;
auto all_flat_result = GetColumnVector(result); auto all_flat_result = GetColumnVector(result);
@ -100,6 +106,7 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
SkipFollowingExprs(i + 1); SkipFollowingExprs(i + 1);
return; return;
} }
SetNextExprBitmapInput(all_flat_result, context);
continue; continue;
} }
auto input_flat_result = GetColumnVector(input_result); auto input_flat_result = GetColumnVector(input_result);
@ -110,7 +117,9 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
SkipFollowingExprs(i + 1); SkipFollowingExprs(i + 1);
return; return;
} }
SetNextExprBitmapInput(all_flat_result, context);
} }
ClearBitmapInput(context);
} }
} //namespace exec } //namespace exec

View File

@ -66,7 +66,7 @@ struct ConjunctElementFunc {
class PhyConjunctFilterExpr : public Expr { class PhyConjunctFilterExpr : public Expr {
public: public:
PhyConjunctFilterExpr(std::vector<ExprPtr>&& inputs, bool is_and) PhyConjunctFilterExpr(std::vector<ExprPtr>&& inputs, bool is_and)
: Expr(DataType::BOOL, std::move(inputs), is_and ? "and" : "or"), : Expr(DataType::BOOL, std::move(inputs), "PhyConjunctFilterExpr"),
is_and_(is_and) { is_and_(is_and) {
std::vector<DataType> input_types; std::vector<DataType> input_types;
input_types.reserve(inputs_.size()); input_types.reserve(inputs_.size());
@ -101,6 +101,63 @@ class PhyConjunctFilterExpr : public Expr {
return true; return true;
} }
std::string
ToString() const {
if (!input_order_.empty()) {
std::vector<std::string> inputs;
for (auto& i : input_order_) {
inputs.push_back(inputs_[i]->ToString());
}
std::string input_str =
is_and_ ? Join(inputs, " && ") : Join(inputs, " || ");
return fmt::format("[ConjuctExpr:{}]", input_str);
}
std::vector<std::string> inputs;
for (auto& in : inputs_) {
inputs.push_back(in->ToString());
}
std::string input_str =
is_and_ ? Join(inputs, " && ") : Join(inputs, "||");
return fmt::format("[ConjuctExpr:{}]", input_str);
}
bool
IsSource() const override {
return false;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
void
Reorder(const std::vector<size_t>& exprs_order) {
input_order_ = exprs_order;
}
std::vector<size_t>
GetReorder() {
return input_order_;
}
void
SetNextExprBitmapInput(const ColumnVectorPtr& vec, EvalCtx& context) {
TargetBitmapView last_res_bitmap(vec->GetRawData(), vec->size());
TargetBitmap next_input_bitmap(last_res_bitmap);
if (is_and_) {
context.set_bitmap_input(std::move(next_input_bitmap));
} else {
next_input_bitmap.flip();
context.set_bitmap_input(std::move(next_input_bitmap));
}
}
void
ClearBitmapInput(EvalCtx& context) {
context.clear_bitmap_input();
}
private: private:
int64_t int64_t
UpdateResult(ColumnVectorPtr& input_result, UpdateResult(ColumnVectorPtr& input_result,
@ -117,7 +174,7 @@ class PhyConjunctFilterExpr : public Expr {
SkipFollowingExprs(int start); SkipFollowingExprs(int start);
// true if conjunction (and), false if disjunction (or). // true if conjunction (and), false if disjunction (or).
bool is_and_; bool is_and_;
std::vector<int32_t> input_order_; std::vector<size_t> input_order_;
}; };
} //namespace exec } //namespace exec
} // namespace milvus } // namespace milvus

View File

@ -69,12 +69,30 @@ class EvalCtx {
offset_input_ = offset_input; offset_input_ = offset_input;
} }
inline void
set_bitmap_input(TargetBitmap&& bitmap_input) {
bitmap_input_ = std::move(bitmap_input);
}
inline const TargetBitmap&
get_bitmap_input() const {
return bitmap_input_;
}
void
clear_bitmap_input() {
bitmap_input_.clear();
}
private: private:
ExecContext* exec_ctx_ = nullptr; ExecContext* exec_ctx_ = nullptr;
ExprSet* expr_set_ = nullptr; ExprSet* expr_set_ = nullptr;
// we may accept offsets array as input and do expr filtering on these data // we may accept offsets array as input and do expr filtering on these data
OffsetVector* offset_input_ = nullptr; OffsetVector* offset_input_ = nullptr;
bool input_no_nulls_ = false; bool input_no_nulls_ = false;
// used for expr pre filter, that avoid unnecessary execution on filtered data
TargetBitmap bitmap_input_;
}; };
} // namespace exec } // namespace exec

View File

@ -30,7 +30,7 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
PanicInfo(ExprInvalid, PanicInfo(ExprInvalid,
"exists expr for json index mode not supported"); "exists expr for json index mode not supported");
} }
result = EvalJsonExistsForDataSegment(input); result = EvalJsonExistsForDataSegment(context);
break; break;
} }
default: default:
@ -41,21 +41,26 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
} }
VectorPtr VectorPtr
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) { PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
FieldId field_id = expr_->column_.field_id_;
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&bitmap_input, &
processed_cursor ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -63,6 +68,7 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
const std::string& pointer) { const std::string& pointer) {
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -72,8 +78,12 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = data[offset].exist(pointer); res[i] = data[offset].exist(pointer);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;

View File

@ -57,9 +57,24 @@ class PhyExistsFilterExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
VectorPtr VectorPtr
EvalJsonExistsForDataSegment(OffsetVector* input = nullptr); EvalJsonExistsForDataSegment(EvalCtx& context);
private: private:
std::shared_ptr<const milvus::expr::ExistsExpr> expr_; std::shared_ptr<const milvus::expr::ExistsExpr> expr_;

View File

@ -67,7 +67,9 @@ CompileExpressions(const std::vector<expr::TypedExprPtr>& sources,
enable_constant_folding)); enable_constant_folding));
} }
OptimizeCompiledExprs(context, exprs); if (OPTIMIZE_EXPR_ENABLED) {
OptimizeCompiledExprs(context, exprs);
}
return exprs; return exprs;
} }
@ -303,9 +305,174 @@ CompileExpression(const expr::TypedExprPtr& expr,
return result; return result;
} }
bool
IsLikeExpr(std::shared_ptr<Expr> input) {
if (input->name() == "PhyUnaryRangeFilterExpr") {
auto optype = std::static_pointer_cast<PhyUnaryRangeFilterExpr>(input)
->GetLogicalExpr()
->op_type_;
switch (optype) {
case proto::plan::PrefixMatch:
case proto::plan::PostfixMatch:
case proto::plan::Match:
return true;
default:
return false;
}
}
return false;
}
inline void
ReorderConjunctExpr(std::shared_ptr<milvus::exec::PhyConjunctFilterExpr>& expr,
ExecContext* context,
bool& has_heavy_operation) {
auto* segment = context->get_query_context()->get_segment();
if (!segment || !expr) {
return;
}
std::vector<size_t> reorder;
std::vector<size_t> numeric_expr;
std::vector<size_t> indexed_expr;
std::vector<size_t> string_expr;
std::vector<size_t> str_like_expr;
std::vector<size_t> json_expr;
std::vector<size_t> json_like_expr;
std::vector<size_t> array_expr;
std::vector<size_t> array_like_expr;
std::vector<size_t> compare_expr;
std::vector<size_t> other_expr;
std::vector<size_t> heavy_conjunct_expr;
std::vector<size_t> light_conjunct_expr;
const auto& inputs = expr->GetInputsRef();
for (int i = 0; i < inputs.size(); i++) {
auto input = inputs[i];
if (input->IsSource() && input->GetColumnInfo().has_value()) {
auto column = input->GetColumnInfo().value();
if (IsNumericDataType(column.data_type_)) {
numeric_expr.push_back(i);
continue;
}
if (segment->HasIndex(column.field_id_)) {
indexed_expr.push_back(i);
continue;
}
if (IsStringDataType(column.data_type_)) {
auto is_like_expr = IsLikeExpr(input);
if (is_like_expr) {
str_like_expr.push_back(i);
has_heavy_operation = true;
} else {
string_expr.push_back(i);
}
continue;
}
if (IsArrayDataType(column.data_type_)) {
auto is_like_expr = IsLikeExpr(input);
if (is_like_expr) {
array_like_expr.push_back(i);
has_heavy_operation = true;
} else {
array_expr.push_back(i);
}
continue;
}
if (IsJsonDataType(column.data_type_)) {
auto is_like_expr = IsLikeExpr(input);
if (is_like_expr) {
json_like_expr.push_back(i);
} else {
json_expr.push_back(i);
}
has_heavy_operation = true;
continue;
}
}
if (input->name() == "PhyConjunctFilterExpr") {
bool sub_expr_heavy = false;
auto expr = std::static_pointer_cast<PhyConjunctFilterExpr>(input);
ReorderConjunctExpr(expr, context, sub_expr_heavy);
has_heavy_operation |= sub_expr_heavy;
if (sub_expr_heavy) {
heavy_conjunct_expr.push_back(i);
} else {
light_conjunct_expr.push_back(i);
}
continue;
}
if (input->name() == "PhyCompareFilterExpr") {
compare_expr.push_back(i);
has_heavy_operation = true;
continue;
}
other_expr.push_back(i);
}
reorder.reserve(inputs.size());
// Final reorder sequence:
// 1. Numeric column expressions (fastest to evaluate)
// 2. Indexed column expressions (can use index for efficient filtering)
// 3. String column expressions
// 4. Light conjunct expressions (conjunctions without heavy operations)
// 5. Other expressions
// 6. Array column expression
// 7. String like expression
// 8. Array like expression
// 9. JSON column expressions (expensive to evaluate)
// 10. JSON like expression (more expensive than common json compare)
// 11. Heavy conjunct expressions (conjunctions with heavy operations)
// 12. Compare filter expressions (most expensive, comparing two columns)
reorder.insert(reorder.end(), numeric_expr.begin(), numeric_expr.end());
reorder.insert(reorder.end(), indexed_expr.begin(), indexed_expr.end());
reorder.insert(reorder.end(), string_expr.begin(), string_expr.end());
reorder.insert(
reorder.end(), light_conjunct_expr.begin(), light_conjunct_expr.end());
reorder.insert(reorder.end(), other_expr.begin(), other_expr.end());
reorder.insert(reorder.end(), array_expr.begin(), array_expr.end());
reorder.insert(reorder.end(), str_like_expr.begin(), str_like_expr.end());
reorder.insert(
reorder.end(), array_like_expr.begin(), array_like_expr.end());
reorder.insert(reorder.end(), json_expr.begin(), json_expr.end());
reorder.insert(reorder.end(), json_like_expr.begin(), json_like_expr.end());
reorder.insert(
reorder.end(), heavy_conjunct_expr.begin(), heavy_conjunct_expr.end());
reorder.insert(reorder.end(), compare_expr.begin(), compare_expr.end());
AssertInfo(reorder.size() == inputs.size(),
"reorder size:{} but input size:{}",
reorder.size(),
inputs.size());
expr->Reorder(reorder);
}
inline void inline void
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) { OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
//TODO: add optimization pattern std::chrono::high_resolution_clock::time_point start =
std::chrono::high_resolution_clock::now();
for (const auto& expr : exprs) {
if (expr->name() == "PhyConjunctFilterExpr") {
LOG_DEBUG("before reoder filter expression: {}", expr->ToString());
auto conjunct_expr =
std::static_pointer_cast<PhyConjunctFilterExpr>(expr);
bool has_heavy_operation = false;
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
}
}
std::chrono::high_resolution_clock::time_point end =
std::chrono::high_resolution_clock::now();
double cost =
std::chrono::duration<double, std::micro>(end - start).count();
monitor::internal_core_optimize_expr_latency.Observe(cost / 1000);
} }
} // namespace exec } // namespace exec

View File

@ -64,7 +64,7 @@ class Expr {
} }
std::string std::string
get_name() { name() {
return name_; return name_;
} }
@ -88,9 +88,29 @@ class Expr {
return true; return true;
} }
virtual std::string
ToString() const {
PanicInfo(ErrorCode::NotImplemented, "not implemented");
}
virtual bool
IsSource() const {
return false;
}
virtual std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const {
PanicInfo(ErrorCode::NotImplemented, "not implemented");
}
const std::vector<std::shared_ptr<Expr>>&
GetInputsRef() {
return inputs_;
}
protected: protected:
DataType type_; DataType type_;
const std::vector<std::shared_ptr<Expr>> inputs_; std::vector<std::shared_ptr<Expr>> inputs_;
std::string name_; std::string name_;
// NOTE: unused // NOTE: unused
std::shared_ptr<VectorFunction> vector_func_; std::shared_ptr<VectorFunction> vector_func_;
@ -167,6 +187,11 @@ class SegmentExpr : public Expr {
} }
} }
virtual bool
IsSource() const override {
return true;
}
void void
MoveCursorForDataMultipleChunk() { MoveCursorForDataMultipleChunk() {
int64_t processed_size = 0; int64_t processed_size = 0;
@ -1142,6 +1167,9 @@ class SegmentExpr : public Expr {
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr}; std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
}; };
bool
IsLikeExpr(std::shared_ptr<Expr> expr);
void void
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs); OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs);

View File

@ -30,17 +30,17 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
if (is_index_mode_ && !has_offset_input_) { if (is_index_mode_ && !has_offset_input_) {
result = EvalArrayContainsForIndexSegment(); result = EvalArrayContainsForIndexSegment();
} else { } else {
result = EvalJsonContainsForDataSegment(input); result = EvalJsonContainsForDataSegment(context);
} }
break; break;
} }
case DataType::JSON: { case DataType::JSON: {
if (is_index_mode_ && !has_offset_input_) { if (is_index_mode_ && !context.get_offset_input()) {
PanicInfo( PanicInfo(ExprInvalid,
ExprInvalid, "exists expr for json or array index mode not "
"exists expr for json or array index mode not supported"); "supported");
} }
result = EvalJsonContainsForDataSegment(input); result = EvalJsonContainsForDataSegment(context);
break; break;
} }
default: default:
@ -51,7 +51,7 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
} }
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) { PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(EvalCtx& context) {
auto data_type = expr_->column_.data_type_; auto data_type = expr_->column_.data_type_;
switch (expr_->op_) { switch (expr_->op_) {
case proto::plan::JSONContainsExpr_JSONOp_Contains: case proto::plan::JSONContainsExpr_JSONOp_Contains:
@ -60,16 +60,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
auto val_type = expr_->vals_[0].val_case(); auto val_type = expr_->vals_[0].val_case();
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::kBoolVal: { case proto::plan::GenericValue::kBoolVal: {
return ExecArrayContains<bool>(input); return ExecArrayContains<bool>(context);
} }
case proto::plan::GenericValue::kInt64Val: { case proto::plan::GenericValue::kInt64Val: {
return ExecArrayContains<int64_t>(input); return ExecArrayContains<int64_t>(context);
} }
case proto::plan::GenericValue::kFloatVal: { case proto::plan::GenericValue::kFloatVal: {
return ExecArrayContains<double>(input); return ExecArrayContains<double>(context);
} }
case proto::plan::GenericValue::kStringVal: { case proto::plan::GenericValue::kStringVal: {
return ExecArrayContains<std::string>(input); return ExecArrayContains<std::string>(context);
} }
default: default:
PanicInfo( PanicInfo(
@ -81,19 +81,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
auto val_type = expr_->vals_[0].val_case(); auto val_type = expr_->vals_[0].val_case();
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::kBoolVal: { case proto::plan::GenericValue::kBoolVal: {
return ExecJsonContains<bool>(input); return ExecJsonContains<bool>(context);
} }
case proto::plan::GenericValue::kInt64Val: { case proto::plan::GenericValue::kInt64Val: {
return ExecJsonContains<int64_t>(input); return ExecJsonContains<int64_t>(context);
} }
case proto::plan::GenericValue::kFloatVal: { case proto::plan::GenericValue::kFloatVal: {
return ExecJsonContains<double>(input); return ExecJsonContains<double>(context);
} }
case proto::plan::GenericValue::kStringVal: { case proto::plan::GenericValue::kStringVal: {
return ExecJsonContains<std::string>(input); return ExecJsonContains<std::string>(context);
} }
case proto::plan::GenericValue::kArrayVal: { case proto::plan::GenericValue::kArrayVal: {
return ExecJsonContainsArray(input); return ExecJsonContainsArray(context);
} }
default: default:
PanicInfo(DataTypeInvalid, PanicInfo(DataTypeInvalid,
@ -101,7 +101,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
val_type); val_type);
} }
} else { } else {
return ExecJsonContainsWithDiffType(input); return ExecJsonContainsWithDiffType(context);
} }
} }
} }
@ -110,16 +110,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
auto val_type = expr_->vals_[0].val_case(); auto val_type = expr_->vals_[0].val_case();
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::kBoolVal: { case proto::plan::GenericValue::kBoolVal: {
return ExecArrayContainsAll<bool>(input); return ExecArrayContainsAll<bool>(context);
} }
case proto::plan::GenericValue::kInt64Val: { case proto::plan::GenericValue::kInt64Val: {
return ExecArrayContainsAll<int64_t>(input); return ExecArrayContainsAll<int64_t>(context);
} }
case proto::plan::GenericValue::kFloatVal: { case proto::plan::GenericValue::kFloatVal: {
return ExecArrayContainsAll<double>(input); return ExecArrayContainsAll<double>(context);
} }
case proto::plan::GenericValue::kStringVal: { case proto::plan::GenericValue::kStringVal: {
return ExecArrayContainsAll<std::string>(input); return ExecArrayContainsAll<std::string>(context);
} }
default: default:
PanicInfo( PanicInfo(
@ -131,19 +131,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
auto val_type = expr_->vals_[0].val_case(); auto val_type = expr_->vals_[0].val_case();
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::kBoolVal: { case proto::plan::GenericValue::kBoolVal: {
return ExecJsonContainsAll<bool>(input); return ExecJsonContainsAll<bool>(context);
} }
case proto::plan::GenericValue::kInt64Val: { case proto::plan::GenericValue::kInt64Val: {
return ExecJsonContainsAll<int64_t>(input); return ExecJsonContainsAll<int64_t>(context);
} }
case proto::plan::GenericValue::kFloatVal: { case proto::plan::GenericValue::kFloatVal: {
return ExecJsonContainsAll<double>(input); return ExecJsonContainsAll<double>(context);
} }
case proto::plan::GenericValue::kStringVal: { case proto::plan::GenericValue::kStringVal: {
return ExecJsonContainsAll<std::string>(input); return ExecJsonContainsAll<std::string>(context);
} }
case proto::plan::GenericValue::kArrayVal: { case proto::plan::GenericValue::kArrayVal: {
return ExecJsonContainsAllArray(input); return ExecJsonContainsAllArray(context);
} }
default: default:
PanicInfo(DataTypeInvalid, PanicInfo(DataTypeInvalid,
@ -151,7 +151,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
val_type); val_type);
} }
} else { } else {
return ExecJsonContainsAllWithDiffType(input); return ExecJsonContainsAllWithDiffType(context);
} }
} }
} }
@ -164,11 +164,13 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecArrayContains(EvalCtx& context) {
using GetType = using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>, std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view, std::string_view,
ExprValueType>; ExprValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
@ -177,18 +179,21 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
AssertInfo(expr_->column_.nested_path_.size() == 0, AssertInfo(expr_->column_.nested_path_.size() == 0,
"[ExecArrayContains]nested path must be null"); "[ExecArrayContains]nested path must be null");
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) { if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_); arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
arg_inited_ = true; arg_inited_ = true;
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data, const milvus::ArrayView* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -205,6 +210,7 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -214,8 +220,12 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -241,30 +251,36 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
using GetType = using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>, std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view, std::string_view,
ExprValueType>; ExprValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
if (!arg_inited_) { if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_); arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
arg_inited_ = true; arg_inited_ = true;
} }
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -290,6 +306,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -299,8 +316,12 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -329,26 +350,31 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
} }
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::vector<proto::plan::Array> elements; std::vector<proto::plan::Array> elements;
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element)); elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
} }
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -383,6 +409,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -392,8 +419,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -423,11 +454,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
using GetType = using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>, std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view, std::string_view,
ExprValueType>; ExprValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
AssertInfo(expr_->column_.nested_path_.size() == 0, AssertInfo(expr_->column_.nested_path_.size() == 0,
"[ExecArrayContainsAll]nested path must be null"); "[ExecArrayContainsAll]nested path must be null");
auto real_batch_size = auto real_batch_size =
@ -436,19 +469,21 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
std::set<GetType> elements; std::set<GetType> elements;
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element)); elements.insert(GetValueFromProto<GetType>(element));
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data, const milvus::ArrayView* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -467,6 +502,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
} }
return tmp_elements.size() == 0; return tmp_elements.size() == 0;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -476,8 +512,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -503,22 +543,24 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
using GetType = using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>, std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view, std::string_view,
ExprValueType>; ExprValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::set<GetType> elements; std::set<GetType> elements;
@ -526,8 +568,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
elements.insert(GetValueFromProto<GetType>(element)); elements.insert(GetValueFromProto<GetType>(element));
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -556,6 +600,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
} }
return tmp_elements.size() == 0; return tmp_elements.size() == 0;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -565,8 +610,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -595,18 +644,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
} }
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType( PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
OffsetVector* input) { auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
@ -618,8 +668,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
i++; i++;
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -707,6 +759,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
} }
return tmp_elements_index.size() == 0; return tmp_elements_index.size() == 0;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -716,8 +769,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -748,18 +806,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
} }
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
@ -767,8 +827,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element)); elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
} }
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -807,6 +870,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
} }
return exist_elements_index.size() == elements.size(); return exist_elements_index.size() == elements.size();
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -816,8 +880,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -846,18 +915,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
} }
VectorPtr VectorPtr
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) { PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
@ -869,8 +940,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
i++; i++;
} }
size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -949,6 +1022,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -958,8 +1032,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;

View File

@ -51,37 +51,52 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
VectorPtr VectorPtr
EvalJsonContainsForDataSegment(OffsetVector* input = nullptr); EvalJsonContainsForDataSegment(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecJsonContains(OffsetVector* input = nullptr); ExecJsonContains(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecArrayContains(OffsetVector* input = nullptr); ExecArrayContains(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecJsonContainsAll(OffsetVector* input = nullptr); ExecJsonContainsAll(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecArrayContainsAll(OffsetVector* input = nullptr); ExecArrayContainsAll(EvalCtx& context);
VectorPtr VectorPtr
ExecJsonContainsArray(OffsetVector* input = nullptr); ExecJsonContainsArray(EvalCtx& context);
VectorPtr VectorPtr
ExecJsonContainsAllArray(OffsetVector* input = nullptr); ExecJsonContainsAllArray(EvalCtx& context);
VectorPtr VectorPtr
ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr); ExecJsonContainsAllWithDiffType(EvalCtx& context);
VectorPtr VectorPtr
ExecJsonContainsWithDiffType(OffsetVector* input = nullptr); ExecJsonContainsWithDiffType(EvalCtx& context);
VectorPtr VectorPtr
EvalArrayContainsForIndexSegment(); EvalArrayContainsForIndexSegment();

View File

@ -87,6 +87,21 @@ class PhyLogicalBinaryExpr : public Expr {
inputs_[1]->SupportOffsetInput(); inputs_[1]->SupportOffsetInput();
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return false;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
std::shared_ptr<const milvus::expr::LogicalBinaryExpr> expr_; std::shared_ptr<const milvus::expr::LogicalBinaryExpr> expr_;
}; };

View File

@ -51,6 +51,21 @@ class PhyLogicalUnaryExpr : public Expr {
return inputs_[0]->SupportOffsetInput(); return inputs_[0]->SupportOffsetInput();
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return false;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
std::shared_ptr<const milvus::expr::LogicalUnaryExpr> expr_; std::shared_ptr<const milvus::expr::LogicalUnaryExpr> expr_;
}; };

View File

@ -50,6 +50,21 @@ class PhyNullExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
ColumnVectorPtr ColumnVectorPtr
PreCheckNullable(OffsetVector* input); PreCheckNullable(OffsetVector* input);

View File

@ -32,31 +32,31 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
} }
switch (expr_->column_.data_type_) { switch (expr_->column_.data_type_) {
case DataType::BOOL: { case DataType::BOOL: {
result = ExecVisitorImpl<bool>(input); result = ExecVisitorImpl<bool>(context);
break; break;
} }
case DataType::INT8: { case DataType::INT8: {
result = ExecVisitorImpl<int8_t>(input); result = ExecVisitorImpl<int8_t>(context);
break; break;
} }
case DataType::INT16: { case DataType::INT16: {
result = ExecVisitorImpl<int16_t>(input); result = ExecVisitorImpl<int16_t>(context);
break; break;
} }
case DataType::INT32: { case DataType::INT32: {
result = ExecVisitorImpl<int32_t>(input); result = ExecVisitorImpl<int32_t>(context);
break; break;
} }
case DataType::INT64: { case DataType::INT64: {
result = ExecVisitorImpl<int64_t>(input); result = ExecVisitorImpl<int64_t>(context);
break; break;
} }
case DataType::FLOAT: { case DataType::FLOAT: {
result = ExecVisitorImpl<float>(input); result = ExecVisitorImpl<float>(context);
break; break;
} }
case DataType::DOUBLE: { case DataType::DOUBLE: {
result = ExecVisitorImpl<double>(input); result = ExecVisitorImpl<double>(context);
break; break;
} }
case DataType::VARCHAR: { case DataType::VARCHAR: {
@ -64,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance() !storage::MmapManager::GetInstance()
.GetMmapConfig() .GetMmapConfig()
.growing_enable_mmap) { .growing_enable_mmap) {
result = ExecVisitorImpl<std::string>(input); result = ExecVisitorImpl<std::string>(context);
} else { } else {
result = ExecVisitorImpl<std::string_view>(input); result = ExecVisitorImpl<std::string_view>(context);
} }
break; break;
} }
case DataType::JSON: { case DataType::JSON: {
if (expr_->vals_.size() == 0) { if (expr_->vals_.size() == 0) {
result = ExecVisitorImplTemplateJson<bool>(input); result = ExecVisitorImplTemplateJson<bool>(context);
break; break;
} }
auto type = expr_->vals_[0].val_case(); auto type = expr_->vals_[0].val_case();
switch (type) { switch (type) {
case proto::plan::GenericValue::ValCase::kBoolVal: case proto::plan::GenericValue::ValCase::kBoolVal:
result = ExecVisitorImplTemplateJson<bool>(input); result = ExecVisitorImplTemplateJson<bool>(context);
break; break;
case proto::plan::GenericValue::ValCase::kInt64Val: case proto::plan::GenericValue::ValCase::kInt64Val:
result = ExecVisitorImplTemplateJson<int64_t>(input); result = ExecVisitorImplTemplateJson<int64_t>(context);
break; break;
case proto::plan::GenericValue::ValCase::kFloatVal: case proto::plan::GenericValue::ValCase::kFloatVal:
result = ExecVisitorImplTemplateJson<double>(input); result = ExecVisitorImplTemplateJson<double>(context);
break; break;
case proto::plan::GenericValue::ValCase::kStringVal: case proto::plan::GenericValue::ValCase::kStringVal:
result = ExecVisitorImplTemplateJson<std::string>(input); result = ExecVisitorImplTemplateJson<std::string>(context);
break; break;
default: default:
PanicInfo(DataTypeInvalid, "unknown data type: {}", type); PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
@ -97,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
case DataType::ARRAY: { case DataType::ARRAY: {
if (expr_->vals_.size() == 0) { if (expr_->vals_.size() == 0) {
SetNotUseIndex(); SetNotUseIndex();
result = ExecVisitorImplTemplateArray<bool>(input); result = ExecVisitorImplTemplateArray<bool>(context);
break; break;
} }
auto type = expr_->vals_[0].val_case(); auto type = expr_->vals_[0].val_case();
switch (type) { switch (type) {
case proto::plan::GenericValue::ValCase::kBoolVal: case proto::plan::GenericValue::ValCase::kBoolVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecVisitorImplTemplateArray<bool>(input); result = ExecVisitorImplTemplateArray<bool>(context);
break; break;
case proto::plan::GenericValue::ValCase::kInt64Val: case proto::plan::GenericValue::ValCase::kInt64Val:
SetNotUseIndex(); SetNotUseIndex();
result = ExecVisitorImplTemplateArray<int64_t>(input); result = ExecVisitorImplTemplateArray<int64_t>(context);
break; break;
case proto::plan::GenericValue::ValCase::kFloatVal: case proto::plan::GenericValue::ValCase::kFloatVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecVisitorImplTemplateArray<double>(input); result = ExecVisitorImplTemplateArray<double>(context);
break; break;
case proto::plan::GenericValue::ValCase::kStringVal: case proto::plan::GenericValue::ValCase::kStringVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecVisitorImplTemplateArray<std::string>(input); result = ExecVisitorImplTemplateArray<std::string>(context);
break; break;
default: default:
PanicInfo(DataTypeInvalid, "unknown data type: {}", type); PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
@ -216,12 +216,11 @@ PhyTermFilterExpr::ExecPkTermImpl() {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
// pk valid_bitmap is always all true
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto current_chunk_view = auto current_chunk_view =
cached_bits_.view(current_data_chunk_pos_, real_batch_size); cached_bits_.view(current_data_chunk_pos_, real_batch_size);
@ -233,9 +232,9 @@ PhyTermFilterExpr::ExecPkTermImpl() {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImplTemplateJson(EvalCtx& context) {
if (expr_->is_in_field_) { if (expr_->is_in_field_) {
return ExecTermJsonVariableInField<ValueType>(input); return ExecTermJsonVariableInField<ValueType>(context);
} else { } else {
if (is_index_mode_) { if (is_index_mode_) {
// we create double index for json int64 field for now // we create double index for json int64 field for now
@ -243,40 +242,42 @@ PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
std::conditional_t<std::is_same_v<ValueType, int64_t>, std::conditional_t<std::is_same_v<ValueType, int64_t>,
double, double,
ValueType>; ValueType>;
return ExecVisitorImplForIndex<GetType>(input); return ExecVisitorImplForIndex<GetType>();
} else { } else {
return ExecTermJsonFieldInVariable<ValueType>(input); return ExecTermJsonFieldInVariable<ValueType>(context);
} }
} }
} }
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImplTemplateArray(EvalCtx& context) {
if (expr_->is_in_field_) { if (expr_->is_in_field_) {
return ExecTermArrayVariableInField<ValueType>(input); return ExecTermArrayVariableInField<ValueType>(context);
} else { } else {
return ExecTermArrayFieldInVariable<ValueType>(input); return ExecTermArrayFieldInVariable<ValueType>(context);
} }
} }
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) { PhyTermFilterExpr::ExecTermArrayVariableInField(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
AssertInfo(expr_->vals_.size() == 1, AssertInfo(expr_->vals_.size() == 1,
"element length in json array must be one"); "element length in json array must be one");
@ -286,8 +287,10 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
} }
auto target_val = arg_val_.GetValue<ValueType>(); auto target_val = arg_val_.GetValue<ValueType>();
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const ArrayView* data, const ArrayView* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -304,6 +307,7 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -313,8 +317,12 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -340,22 +348,24 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
int index = -1; int index = -1;
if (expr_->column_.nested_path_.size() > 0) { if (expr_->column_.nested_path_.size() > 0) {
@ -372,8 +382,10 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
return res_vec; return res_vec;
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const ArrayView* data, const ArrayView* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -382,6 +394,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
TargetBitmapView valid_res, TargetBitmapView valid_res,
int index, int index,
const std::shared_ptr<MultiElement>& term_set) { const std::shared_ptr<MultiElement>& term_set) {
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -395,9 +408,13 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
res[i] = false; res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
auto value = data[offset].get_data<GetType>(index); auto value = data[offset].get_data<GetType>(index);
res[i] = term_set->In(ValueType(value)); res[i] = term_set->In(ValueType(value));
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
@ -428,21 +445,23 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) { PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
AssertInfo(expr_->vals_.size() == 1, AssertInfo(expr_->vals_.size() == 1,
"element length in json array must be one"); "element length in json array must be one");
@ -454,8 +473,10 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const Json* data, const Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -480,6 +501,7 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
} }
return false; return false;
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -489,8 +511,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
@ -515,21 +541,25 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>, using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view, std::string_view,
ValueType>; ValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
FieldId field_id = expr_->column_.field_id_;
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
if (!arg_inited_) { if (!arg_inited_) {
@ -543,8 +573,10 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
return res_vec; return res_vec;
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const Json* data, const Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -571,6 +603,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
} }
return terms->In(ValueType(x.value())); return terms->In(ValueType(x.value()));
}; };
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -584,8 +617,13 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
res[i] = false; res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
continue;
}
res[i] = executor(offset); res[i] = executor(offset);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
@ -614,17 +652,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImpl(EvalCtx& context) {
if (is_index_mode_ && !has_offset_input_) { if (is_index_mode_ && !has_offset_input_) {
return ExecVisitorImplForIndex<T>(input); return ExecVisitorImplForIndex<T>();
} else { } else {
return ExecVisitorImplForData<T>(input); return ExecVisitorImplForData<T>(context);
} }
} }
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImplForIndex() {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
@ -667,7 +705,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
template <> template <>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
using Index = index::ScalarIndex<bool>; using Index = index::ScalarIndex<bool>;
auto real_batch_size = GetNextBatchSize(); auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
@ -689,18 +727,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) { if (!arg_inited_) {
std::vector<T> vals; std::vector<T> vals;
@ -717,8 +758,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
arg_inited_ = true; arg_inited_ = true;
} }
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>( [&processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const T* data, const T* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -726,6 +769,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
const std::shared_ptr<MultiElement>& vals) { const std::shared_ptr<MultiElement>& vals) {
bool has_bitmap_input = !bitmap_input.empty();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -735,8 +779,12 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
continue;
}
res[i] = vals->In(data[offset]); res[i] = vals->In(data[offset]);
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {

View File

@ -75,6 +75,21 @@ class PhyTermFilterExpr : public SegmentExpr {
void void
Eval(EvalCtx& context, VectorPtr& result) override; Eval(EvalCtx& context, VectorPtr& result) override;
bool
IsSource() const override {
return true;
}
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
private: private:
void void
InitPkCacheOffset(); InitPkCacheOffset();
@ -88,39 +103,39 @@ class PhyTermFilterExpr : public SegmentExpr {
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecVisitorImpl(OffsetVector* input = nullptr); ExecVisitorImpl(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecVisitorImplForIndex(OffsetVector* input = nullptr); ExecVisitorImplForIndex();
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecVisitorImplForData(OffsetVector* input = nullptr); ExecVisitorImplForData(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecVisitorImplTemplateJson(OffsetVector* input = nullptr); ExecVisitorImplTemplateJson(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecTermJsonVariableInField(OffsetVector* input = nullptr); ExecTermJsonVariableInField(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecTermJsonFieldInVariable(OffsetVector* input = nullptr); ExecTermJsonFieldInVariable(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecVisitorImplTemplateArray(OffsetVector* input = nullptr); ExecVisitorImplTemplateArray(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecTermArrayVariableInField(OffsetVector* input = nullptr); ExecTermArrayVariableInField(EvalCtx& context);
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
ExecTermArrayFieldInVariable(OffsetVector* input = nullptr); ExecTermArrayFieldInVariable(EvalCtx& context);
private: private:
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_; std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;

View File

@ -89,51 +89,51 @@ PhyUnaryRangeFilterExpr::CanUseIndexForArray<milvus::Array>() {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex() { PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex(EvalCtx& context) {
return ExecRangeVisitorImplArray<T>(); return ExecRangeVisitorImplArray<T>(context);
} }
template <> template <>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex< PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<proto::plan::Array>(
proto::plan::Array>() { EvalCtx& context) {
switch (expr_->op_type_) { switch (expr_->op_type_) {
case proto::plan::Equal: case proto::plan::Equal:
case proto::plan::NotEqual: { case proto::plan::NotEqual: {
switch (expr_->column_.element_type_) { switch (expr_->column_.element_type_) {
case DataType::BOOL: { case DataType::BOOL: {
return ExecArrayEqualForIndex<bool>(expr_->op_type_ == return ExecArrayEqualForIndex<bool>(
proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
case DataType::INT8: { case DataType::INT8: {
return ExecArrayEqualForIndex<int8_t>( return ExecArrayEqualForIndex<int8_t>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
case DataType::INT16: { case DataType::INT16: {
return ExecArrayEqualForIndex<int16_t>( return ExecArrayEqualForIndex<int16_t>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
case DataType::INT32: { case DataType::INT32: {
return ExecArrayEqualForIndex<int32_t>( return ExecArrayEqualForIndex<int32_t>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
case DataType::INT64: { case DataType::INT64: {
return ExecArrayEqualForIndex<int64_t>( return ExecArrayEqualForIndex<int64_t>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
case DataType::FLOAT: case DataType::FLOAT:
case DataType::DOUBLE: { case DataType::DOUBLE: {
// not accurate on floating point number, rollback to bruteforce. // not accurate on floating point number, rollback to bruteforce.
return ExecRangeVisitorImplArray<proto::plan::Array>( return ExecRangeVisitorImplArray<proto::plan::Array>(
nullptr); context);
} }
case DataType::VARCHAR: { case DataType::VARCHAR: {
if (segment_->type() == SegmentType::Growing) { if (segment_->type() == SegmentType::Growing) {
return ExecArrayEqualForIndex<std::string>( return ExecArrayEqualForIndex<std::string>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} else { } else {
return ExecArrayEqualForIndex<std::string_view>( return ExecArrayEqualForIndex<std::string_view>(
expr_->op_type_ == proto::plan::NotEqual); context, expr_->op_type_ == proto::plan::NotEqual);
} }
} }
default: default:
@ -144,7 +144,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
} }
} }
default: default:
return ExecRangeVisitorImplArray<proto::plan::Array>(); return ExecRangeVisitorImplArray<proto::plan::Array>(context);
} }
} }
@ -154,31 +154,31 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
SetHasOffsetInput((input != nullptr)); SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) { switch (expr_->column_.data_type_) {
case DataType::BOOL: { case DataType::BOOL: {
result = ExecRangeVisitorImpl<bool>(input); result = ExecRangeVisitorImpl<bool>(context);
break; break;
} }
case DataType::INT8: { case DataType::INT8: {
result = ExecRangeVisitorImpl<int8_t>(input); result = ExecRangeVisitorImpl<int8_t>(context);
break; break;
} }
case DataType::INT16: { case DataType::INT16: {
result = ExecRangeVisitorImpl<int16_t>(input); result = ExecRangeVisitorImpl<int16_t>(context);
break; break;
} }
case DataType::INT32: { case DataType::INT32: {
result = ExecRangeVisitorImpl<int32_t>(input); result = ExecRangeVisitorImpl<int32_t>(context);
break; break;
} }
case DataType::INT64: { case DataType::INT64: {
result = ExecRangeVisitorImpl<int64_t>(input); result = ExecRangeVisitorImpl<int64_t>(context);
break; break;
} }
case DataType::FLOAT: { case DataType::FLOAT: {
result = ExecRangeVisitorImpl<float>(input); result = ExecRangeVisitorImpl<float>(context);
break; break;
} }
case DataType::DOUBLE: { case DataType::DOUBLE: {
result = ExecRangeVisitorImpl<double>(input); result = ExecRangeVisitorImpl<double>(context);
break; break;
} }
case DataType::VARCHAR: { case DataType::VARCHAR: {
@ -186,9 +186,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance() !storage::MmapManager::GetInstance()
.GetMmapConfig() .GetMmapConfig()
.growing_enable_mmap) { .growing_enable_mmap) {
result = ExecRangeVisitorImpl<std::string>(input); result = ExecRangeVisitorImpl<std::string>(context);
} else { } else {
result = ExecRangeVisitorImpl<std::string_view>(input); result = ExecRangeVisitorImpl<std::string_view>(context);
} }
break; break;
} }
@ -227,20 +227,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
} else { } else {
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::ValCase::kBoolVal: case proto::plan::GenericValue::ValCase::kBoolVal:
result = ExecRangeVisitorImplJson<bool>(input); result = ExecRangeVisitorImplJson<bool>(context);
break; break;
case proto::plan::GenericValue::ValCase::kInt64Val: case proto::plan::GenericValue::ValCase::kInt64Val:
result = ExecRangeVisitorImplJson<int64_t>(input); result = ExecRangeVisitorImplJson<int64_t>(context);
break; break;
case proto::plan::GenericValue::ValCase::kFloatVal: case proto::plan::GenericValue::ValCase::kFloatVal:
result = ExecRangeVisitorImplJson<double>(input); result = ExecRangeVisitorImplJson<double>(context);
break; break;
case proto::plan::GenericValue::ValCase::kStringVal: case proto::plan::GenericValue::ValCase::kStringVal:
result = ExecRangeVisitorImplJson<std::string>(input); result = ExecRangeVisitorImplJson<std::string>(context);
break; break;
case proto::plan::GenericValue::ValCase::kArrayVal: case proto::plan::GenericValue::ValCase::kArrayVal:
result = result = ExecRangeVisitorImplJson<proto::plan::Array>(
ExecRangeVisitorImplJson<proto::plan::Array>(input); context);
break; break;
default: default:
PanicInfo( PanicInfo(
@ -254,28 +254,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (val_type) { switch (val_type) {
case proto::plan::GenericValue::ValCase::kBoolVal: case proto::plan::GenericValue::ValCase::kBoolVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplArray<bool>(input); result = ExecRangeVisitorImplArray<bool>(context);
break; break;
case proto::plan::GenericValue::ValCase::kInt64Val: case proto::plan::GenericValue::ValCase::kInt64Val:
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplArray<int64_t>(input); result = ExecRangeVisitorImplArray<int64_t>(context);
break; break;
case proto::plan::GenericValue::ValCase::kFloatVal: case proto::plan::GenericValue::ValCase::kFloatVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplArray<double>(input); result = ExecRangeVisitorImplArray<double>(context);
break; break;
case proto::plan::GenericValue::ValCase::kStringVal: case proto::plan::GenericValue::ValCase::kStringVal:
SetNotUseIndex(); SetNotUseIndex();
result = ExecRangeVisitorImplArray<std::string>(input); result = ExecRangeVisitorImplArray<std::string>(context);
break; break;
case proto::plan::GenericValue::ValCase::kArrayVal: case proto::plan::GenericValue::ValCase::kArrayVal:
if (!has_offset_input_ && if (!has_offset_input_ &&
CanUseIndexForArray<milvus::Array>()) { CanUseIndexForArray<milvus::Array>()) {
result = ExecRangeVisitorImplArrayForIndex< result = ExecRangeVisitorImplArrayForIndex<
proto::plan::Array>(); proto::plan::Array>(context);
} else { } else {
result = ExecRangeVisitorImplArray<proto::plan::Array>( result = ExecRangeVisitorImplArray<proto::plan::Array>(
input); context);
} }
break; break;
default: default:
@ -293,17 +293,19 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
template <typename ValueType> template <typename ValueType>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) { PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) { if (!arg_inited_) {
value_arg_.SetValue<ValueType>(expr_->val_); value_arg_.SetValue<ValueType>(expr_->val_);
@ -315,16 +317,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
if (expr_->column_.nested_path_.size() > 0) { if (expr_->column_.nested_path_.size() > 0) {
index = std::stoi(expr_->column_.nested_path_[0]); index = std::stoi(expr_->column_.nested_path_[0]);
} }
auto execute_sub_batch = [op_type]<FilterType filter_type = int processed_cursor = 0;
FilterType::sequential>( auto execute_sub_batch =
const milvus::ArrayView* data, [ op_type, &processed_cursor, &
const bool* valid_data, bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const int32_t* offsets, const milvus::ArrayView* data,
const int size, const bool* valid_data,
TargetBitmapView res, const int32_t* offsets,
TargetBitmapView valid_res, const int size,
ValueType val, TargetBitmapView res,
int index) { TargetBitmapView valid_res,
ValueType val,
int index) {
switch (op_type) { switch (op_type) {
case proto::plan::GreaterThan: { case proto::plan::GreaterThan: {
UnaryElementFuncForArray<ValueType, UnaryElementFuncForArray<ValueType,
@ -338,6 +342,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -353,6 +359,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -368,6 +376,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -383,6 +393,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -398,6 +410,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -413,6 +427,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -428,6 +444,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -443,6 +461,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
index, index,
res, res,
valid_res, valid_res,
bitmap_input,
processed_cursor,
offsets); offsets);
break; break;
} }
@ -452,6 +472,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
fmt::format("unsupported operator type for unary expr: {}", fmt::format("unsupported operator type for unary expr: {}",
op_type)); op_type));
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
@ -477,7 +498,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
bool reverse) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
@ -491,7 +513,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
auto val = GetValueFromProto<proto::plan::Array>(expr_->val_); auto val = GetValueFromProto<proto::plan::Array>(expr_->val_);
if (val.array_size() == 0) { if (val.array_size() == 0) {
// rollback to bruteforce. no candidates will be filtered out via index. // rollback to bruteforce. no candidates will be filtered out via index.
return ExecRangeVisitorImplArray<proto::plan::Array>(); return ExecRangeVisitorImplArray<proto::plan::Array>(context);
} }
// cache the result to suit the framework. // cache the result to suit the framework.
@ -587,11 +609,14 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) { PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
using GetType = using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>, std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view, std::string_view,
ExprValueType>; ExprValueType>;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
FieldId field_id = expr_->column_.field_id_;
auto real_batch_size = auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize(); has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) { if (real_batch_size == 0) {
@ -602,13 +627,13 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
value_arg_.SetValue<ExprValueType>(expr_->val_); value_arg_.SetValue<ExprValueType>(expr_->val_);
arg_inited_ = true; arg_inited_ = true;
} }
auto res_vec =
ExprValueType val = value_arg_.GetValue<ExprValueType>(); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
auto res_vec = std::make_shared<ColumnVector>( TargetBitmap(real_batch_size, true));
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
ExprValueType val = value_arg_.GetValue<ExprValueType>();
auto op_type = expr_->op_type_; auto op_type = expr_->op_type_;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
@ -642,8 +667,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = (cmp); \ res[i] = (cmp); \
} while (false) } while (false)
int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[ op_type, pointer ]<FilterType filter_type = FilterType::sequential>( [ op_type, pointer, &processed_cursor, &
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data, const milvus::Json* data,
const bool* valid_data, const bool* valid_data,
const int32_t* offsets, const int32_t* offsets,
@ -651,6 +678,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ExprValueType val) { ExprValueType val) {
bool has_bitmap_input = !bitmap_input.empty();
switch (op_type) { switch (op_type) {
case proto::plan::GreaterThan: { case proto::plan::GreaterThan: {
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
@ -662,6 +690,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -680,6 +712,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -698,6 +734,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -716,6 +756,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -734,6 +778,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
auto doc = data[i].doc(); auto doc = data[i].doc();
auto array = doc.at_pointer(pointer).get_array(); auto array = doc.at_pointer(pointer).get_array();
@ -758,6 +806,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
auto doc = data[i].doc(); auto doc = data[i].doc();
auto array = doc.at_pointer(pointer).get_array(); auto array = doc.at_pointer(pointer).get_array();
@ -782,6 +834,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -804,6 +860,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input &&
!bitmap_input[i + processed_cursor]) {
continue;
}
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false; res[i] = false;
} else { } else {
@ -819,6 +879,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
fmt::format("unsupported operator type for unary expr: {}", fmt::format("unsupported operator type for unary expr: {}",
op_type)); op_type));
} }
processed_cursor += size;
}; };
int64_t processed_size; int64_t processed_size;
if (has_offset_input_) { if (has_offset_input_) {
@ -839,7 +900,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
if (expr_->op_type_ == proto::plan::OpType::TextMatch || if (expr_->op_type_ == proto::plan::OpType::TextMatch ||
expr_->op_type_ == proto::plan::OpType::PhraseMatch) { expr_->op_type_ == proto::plan::OpType::PhraseMatch) {
if (has_offset_input_) { if (has_offset_input_) {
@ -853,7 +914,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
if (CanUseIndex<T>() && !has_offset_input_) { if (CanUseIndex<T>() && !has_offset_input_) {
return ExecRangeVisitorImplForIndex<T>(); return ExecRangeVisitorImplForIndex<T>();
} else { } else {
return ExecRangeVisitorImplForData<T>(input); return ExecRangeVisitorImplForData<T>(context);
} }
} }
@ -1003,10 +1064,13 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
template <typename T> template <typename T>
VectorPtr VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
auto* input = context.get_offset_input();
const auto& bitmap_input = context.get_bitmap_input();
if (auto res = PreCheckOverflow<T>(input)) { if (auto res = PreCheckOverflow<T>(input)) {
return res; return res;
} }
@ -1022,62 +1086,112 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
arg_inited_ = true; arg_inited_ = true;
} }
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_); IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
auto res_vec = std::make_shared<ColumnVector>( auto res_vec =
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
TargetBitmap(real_batch_size, true));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto expr_type = expr_->op_type_; auto expr_type = expr_->op_type_;
auto execute_sub_batch = [expr_type]<FilterType filter_type = size_t processed_cursor = 0;
FilterType::sequential>( auto execute_sub_batch =
const T* data, [ expr_type, &processed_cursor, &
const bool* valid_data, bitmap_input ]<FilterType filter_type = FilterType::sequential>(
const int32_t* offsets, const T* data,
const int size, const bool* valid_data,
TargetBitmapView res, const int32_t* offsets,
TargetBitmapView valid_res, const int size,
IndexInnerType val) { TargetBitmapView res,
TargetBitmapView valid_res,
IndexInnerType val) {
switch (expr_type) { switch (expr_type) {
case proto::plan::GreaterThan: { case proto::plan::GreaterThan: {
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func; UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::GreaterEqual: { case proto::plan::GreaterEqual: {
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type> UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
func; func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::LessThan: { case proto::plan::LessThan: {
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func; UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::LessEqual: { case proto::plan::LessEqual: {
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func; UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::Equal: { case proto::plan::Equal: {
UnaryElementFunc<T, proto::plan::Equal, filter_type> func; UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::NotEqual: { case proto::plan::NotEqual: {
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func; UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::PrefixMatch: { case proto::plan::PrefixMatch: {
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func; UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
case proto::plan::Match: { case proto::plan::Match: {
UnaryElementFunc<T, proto::plan::Match, filter_type> func; UnaryElementFunc<T, proto::plan::Match, filter_type> func;
func(data, size, val, res, offsets); func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break; break;
} }
default: default:
@ -1090,7 +1204,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered // so not divide data again for the reason that it may reduce performance if the null distribution is scattered
// but to mask res with valid_data after the batch operation. // but to mask res with valid_data after the batch operation.
if (valid_data != nullptr) { if (valid_data != nullptr) {
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
continue;
}
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i; offset = (offsets) ? offsets[i] : i;
@ -1100,6 +1218,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
} }
} }
} }
processed_cursor += size;
}; };
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index, auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,

View File

@ -41,15 +41,20 @@ struct UnaryElementFuncForMatch {
void void
operator()(const T* src, operator()(const T* src,
size_t size, size_t size,
IndexInnerType val, IndexInnerType val,
TargetBitmapView res, TargetBitmapView res,
int64_t* offsets = nullptr) { const TargetBitmap& bitmap_input,
int start_cursor,
const int32_t* offsets = nullptr) {
PatternMatchTranslator translator; PatternMatchTranslator translator;
auto regex_pattern = translator(val); auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern); RegexMatcher matcher(regex_pattern);
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
continue;
}
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
res[i] = matcher(src[offsets ? offsets[i] : i]); res[i] = matcher(src[offsets ? offsets[i] : i]);
} else { } else {
@ -69,17 +74,25 @@ struct UnaryElementFunc {
size_t size, size_t size,
IndexInnerType val, IndexInnerType val,
TargetBitmapView res, TargetBitmapView res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
bool has_bitmap_input = !bitmap_input.empty();
if constexpr (op == proto::plan::OpType::Match) { if constexpr (op == proto::plan::OpType::Match) {
UnaryElementFuncForMatch<T, filter_type> func; UnaryElementFuncForMatch<T, filter_type> func;
func(src, size, val, res); func(src, size, val, res, bitmap_input, start_cursor, offsets);
return; return;
} }
// This is the original code, which is kept for the documentation purposes // This is the original code, which is kept for the documentation purposes
// also, for iterative filter // also, for iterative filter
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random ||
std::is_same_v<T, std::string_view> ||
std::is_same_v<T, std::string>) {
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
continue;
}
auto offset = (offsets != nullptr) ? offsets[i] : i; auto offset = (offsets != nullptr) ? offsets[i] : i;
if constexpr (op == proto::plan::OpType::Equal) { if constexpr (op == proto::plan::OpType::Equal) {
res[i] = src[offset] == val; res[i] = src[offset] == val;
@ -164,7 +177,10 @@ struct UnaryElementFuncForArray {
int index, int index,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
const TargetBitmap& bitmap_input,
size_t start_cursor,
const int32_t* offsets = nullptr) { const int32_t* offsets = nullptr) {
bool has_bitmap_input = !bitmap_input.empty();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
auto offset = i; auto offset = i;
if constexpr (filter_type == FilterType::random) { if constexpr (filter_type == FilterType::random) {
@ -174,6 +190,9 @@ struct UnaryElementFuncForArray {
res[i] = valid_res[i] = false; res[i] = valid_res[i] = false;
continue; continue;
} }
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
continue;
}
if constexpr (op == proto::plan::OpType::Equal) { if constexpr (op == proto::plan::OpType::Equal) {
if constexpr (std::is_same_v<GetType, proto::plan::Array>) { if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = src[offset].is_same_array(val); res[i] = src[offset].is_same_array(val);
@ -340,10 +359,30 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
return true; return true;
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return expr_->column_;
}
bool
IsSource() const override {
return true;
}
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr>
GetLogicalExpr() {
return expr_;
}
private: private:
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecRangeVisitorImpl(OffsetVector* input = nullptr); ExecRangeVisitorImpl(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
@ -351,23 +390,23 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecRangeVisitorImplForData(OffsetVector* input = nullptr); ExecRangeVisitorImplForData(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecRangeVisitorImplJson(OffsetVector* input = nullptr); ExecRangeVisitorImplJson(EvalCtx& context);
template <typename ExprValueType> template <typename ExprValueType>
VectorPtr VectorPtr
ExecRangeVisitorImplArray(OffsetVector* input = nullptr); ExecRangeVisitorImplArray(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecRangeVisitorImplArrayForIndex(); ExecRangeVisitorImplArrayForIndex(EvalCtx& context);
template <typename T> template <typename T>
VectorPtr VectorPtr
ExecArrayEqualForIndex(bool reverse); ExecArrayEqualForIndex(EvalCtx& context, bool reverse);
// Check overflow and cache result for performace // Check overflow and cache result for performace
template <typename T> template <typename T>

View File

@ -59,6 +59,21 @@ class PhyValueExpr : public Expr {
} }
} }
std::string
ToString() const {
return fmt::format("{}", expr_->ToString());
}
bool
IsSource() const override {
return true;
}
std::optional<milvus::expr::ColumnInfo>
GetColumnInfo() const override {
return std::nullopt;
}
private: private:
std::shared_ptr<const milvus::expr::ValueExpr> expr_; std::shared_ptr<const milvus::expr::ValueExpr> expr_;
const int64_t active_count_; const int64_t active_count_;

View File

@ -210,6 +210,8 @@ std::map<std::string, std::string> searchGetTargetEntryLatencyLabels{
{"type", "search_get_target_entry_latency"}}; {"type", "search_get_target_entry_latency"}};
std::map<std::string, std::string> randomSampleLatencyLabels{ std::map<std::string, std::string> randomSampleLatencyLabels{
{"type", "random_sample_latency"}}; {"type", "random_sample_latency"}};
std::map<std::string, std::string> optimizeExprLatencyLabels{
{"type", "optimize_expr_latency"}};
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency, DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
"[cpp]latency(us) of search on segment") "[cpp]latency(us) of search on segment")
@ -242,7 +244,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency,
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample, DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample,
internal_core_search_latency, internal_core_search_latency,
randomSampleLatencyLabels) randomSampleLatencyLabels)
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency,
internal_core_search_latency,
optimizeExprLatencyLabels)
// mmap metrics // mmap metrics
std::map<std::string, std::string> mmapAllocatedSpaceAnonLabel = { std::map<std::string, std::string> mmapAllocatedSpaceAnonLabel = {
{"type", "anon"}}; {"type", "anon"}};

View File

@ -142,6 +142,7 @@ DECLARE_PROMETHEUS_HISTOGRAM(internal_core_get_vector_latency);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_retrieve_get_target_entry_latency); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_retrieve_get_target_entry_latency);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency);
// async cgo metrics // async cgo metrics
DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cgo_queue_duration_seconds); DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cgo_queue_duration_seconds);

View File

@ -27,6 +27,7 @@
#include "exec/QueryContext.h" #include "exec/QueryContext.h"
#include "expr/ITypeExpr.h" #include "expr/ITypeExpr.h"
#include "exec/expression/Expr.h" #include "exec/expression/Expr.h"
#include "exec/expression/ConjunctExpr.h"
#include "exec/expression/function/FunctionFactory.h" #include "exec/expression/function/FunctionFactory.h"
using namespace milvus; using namespace milvus;
@ -82,10 +83,12 @@ class TaskTest : public testing::TestWithParam<DataType> {
field_map_.insert({"string2", str2_fid}); field_map_.insert({"string2", str2_fid});
auto str3_fid = schema->AddDebugField("string3", DataType::VARCHAR); auto str3_fid = schema->AddDebugField("string3", DataType::VARCHAR);
field_map_.insert({"string3", str3_fid}); field_map_.insert({"string3", str3_fid});
auto json_fid = schema->AddDebugField("json", DataType::JSON);
field_map_.insert({"json", json_fid});
schema->set_primary_field_id(str1_fid); schema->set_primary_field_id(str1_fid);
auto segment = CreateSealedSegment(schema); auto segment = CreateSealedSegment(schema);
size_t N = 1000000; size_t N = 100000;
num_rows_ = N; num_rows_ = N;
auto raw_data = DataGen(schema, N); auto raw_data = DataGen(schema, N);
auto fields = schema->get_fields(); auto fields = schema->get_fields();
@ -152,7 +155,7 @@ TEST_P(TaskTest, CallExprEmpty) {
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
"test1", "test1",
segment_.get(), segment_.get(),
1000000, 100000,
MAX_TIMESTAMP, MAX_TIMESTAMP,
std::make_shared<milvus::exec::QueryConfig>( std::make_shared<milvus::exec::QueryConfig>(
std::unordered_map<std::string, std::string>{})); std::unordered_map<std::string, std::string>{}));
@ -189,7 +192,7 @@ TEST_P(TaskTest, UnaryExpr) {
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
"test1", "test1",
segment_.get(), segment_.get(),
1000000, 100000,
MAX_TIMESTAMP, MAX_TIMESTAMP,
std::make_shared<milvus::exec::QueryConfig>( std::make_shared<milvus::exec::QueryConfig>(
std::unordered_map<std::string, std::string>{})); std::unordered_map<std::string, std::string>{}));
@ -235,7 +238,7 @@ TEST_P(TaskTest, LogicalExpr) {
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
"test1", "test1",
segment_.get(), segment_.get(),
1000000, 100000,
MAX_TIMESTAMP, MAX_TIMESTAMP,
std::make_shared<milvus::exec::QueryConfig>( std::make_shared<milvus::exec::QueryConfig>(
std::unordered_map<std::string, std::string>{})); std::unordered_map<std::string, std::string>{}));
@ -296,12 +299,12 @@ TEST_P(TaskTest, CompileInputs_and) {
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>( auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr3, expr6); expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP); DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
auto exprs = milvus::exec::CompileInputs(expr7, query_context.get(), {}); auto exprs = milvus::exec::CompileInputs(expr7, query_context.get(), {});
EXPECT_EQ(exprs.size(), 4); EXPECT_EQ(exprs.size(), 4);
for (int i = 0; i < exprs.size(); ++i) { for (int i = 0; i < exprs.size(); ++i) {
std::cout << exprs[i]->get_name() << std::endl; std::cout << exprs[i]->name() << std::endl;
EXPECT_STREQ(exprs[i]->get_name().c_str(), "PhyUnaryRangeFilterExpr"); EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
} }
} }
@ -316,7 +319,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
proto::plan::GenericValue val; proto::plan::GenericValue val;
val.set_int64_val(10); val.set_int64_val(10);
{ {
// expr: (int64_fid < 10 and int64_fid < 10) or (int64_fid < 10 and int64_fid < 10) // expr: (int64_fid > 10 and int64_fid > 10) or (int64_fid > 10 and int64_fid > 10)
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>( auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64), expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::GreaterThan, proto::plan::OpType::GreaterThan,
@ -342,19 +345,19 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>( auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2); expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP); DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>( auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6); expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
auto exprs = auto exprs =
milvus::exec::CompileInputs(expr7, query_context.get(), {}); milvus::exec::CompileInputs(expr7, query_context.get(), {});
EXPECT_EQ(exprs.size(), 2); EXPECT_EQ(exprs.size(), 2);
for (int i = 0; i < exprs.size(); ++i) { for (int i = 0; i < exprs.size(); ++i) {
std::cout << exprs[i]->get_name() << std::endl; std::cout << exprs[i]->name() << std::endl;
EXPECT_STREQ(exprs[i]->get_name().c_str(), "and"); EXPECT_STREQ(exprs[i]->name().c_str(), "PhyConjunctFilterExpr");
} }
} }
{ {
// expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid < 10 and int64_fid < 10) // expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid > 10 and int64_fid > 10)
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>( auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64), expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::GreaterThan, proto::plan::OpType::GreaterThan,
@ -380,7 +383,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>( auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2); expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP); DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>( auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6); expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
auto exprs = auto exprs =
@ -388,14 +391,13 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
std::cout << exprs.size() << std::endl; std::cout << exprs.size() << std::endl;
EXPECT_EQ(exprs.size(), 3); EXPECT_EQ(exprs.size(), 3);
for (int i = 0; i < exprs.size() - 1; ++i) { for (int i = 0; i < exprs.size() - 1; ++i) {
std::cout << exprs[i]->get_name() << std::endl; std::cout << exprs[i]->name() << std::endl;
EXPECT_STREQ(exprs[i]->get_name().c_str(), EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
"PhyUnaryRangeFilterExpr");
} }
EXPECT_STREQ(exprs[2]->get_name().c_str(), "and"); EXPECT_STREQ(exprs[2]->name().c_str(), "PhyConjunctFilterExpr");
} }
{ {
// expr: (int64_fid < 10 or int64_fid < 10) and (int64_fid < 10 and int64_fid < 10) // expr: (int64_fid > 10 or int64_fid > 10) and (int64_fid > 10 and int64_fid > 10)
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>( auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64), expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::GreaterThan, proto::plan::OpType::GreaterThan,
@ -421,18 +423,282 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>( auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2); expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>( auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP); DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>( auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr3, expr6); expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
auto exprs = auto exprs =
milvus::exec::CompileInputs(expr7, query_context.get(), {}); milvus::exec::CompileInputs(expr7, query_context.get(), {});
std::cout << exprs.size() << std::endl; std::cout << exprs.size() << std::endl;
EXPECT_EQ(exprs.size(), 3); EXPECT_EQ(exprs.size(), 3);
EXPECT_STREQ(exprs[0]->get_name().c_str(), "or"); EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
for (int i = 1; i < exprs.size(); ++i) { for (int i = 1; i < exprs.size(); ++i) {
std::cout << exprs[i]->get_name() << std::endl; std::cout << exprs[i]->name() << std::endl;
EXPECT_STREQ(exprs[i]->get_name().c_str(), EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
"PhyUnaryRangeFilterExpr");
} }
} }
} }
TEST_P(TaskTest, Test_reorder) {
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
using namespace milvus::exec;
{
// expr: string2 like '%xx' and string2 == 'xxx'
// reorder: string2 == "xxx" and string2 like '%xxx'
proto::plan::GenericValue val1;
val1.set_string_val("%xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
proto::plan::OpType::Match,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_string_val("xxx");
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
proto::plan::OpType::Equal,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: string2 == 'xxx' and int1 < 100
// reorder: int1 < 100 and string2 == 'xxx'
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
proto::plan::OpType::Equal,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
proto::plan::OpType::LessThan,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: json['b'] like '%xx' and json['a'] == 'xxx'
// reorder: json['a'] == 'xxx' and json['b'] like '%xx'
proto::plan::GenericValue val1;
val1.set_string_val("%xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["json"], DataType::JSON),
proto::plan::OpType::Match,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_string_val("xxx");
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["json"], DataType::JSON),
proto::plan::OpType::Equal,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: json['a'] == 'xxx' and int1 == 100
// reorder: int1 == 100 and json['a'] == 'xxx'
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["json"], DataType::JSON),
proto::plan::OpType::Equal,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
proto::plan::OpType::Equal,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: json['a'] == 'xxx' and 0 < int1 < 100
// reorder: 0 < int1 < 100 and json['a'] == 'xxx'
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["json"], DataType::JSON),
proto::plan::OpType::Equal,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue low;
low.set_int64_val(0);
proto::plan::GenericValue upper;
upper.set_int64_val(100);
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
low,
upper,
false,
false);
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: string1 != string2 and 0 < int1 < 100
// reorder: 0 < int1 < 100 and string1 != string2
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::CompareExpr>(field_map_["string1"],
field_map_["string2"],
DataType::VARCHAR,
DataType::VARCHAR,
OpType::LessThan);
proto::plan::GenericValue low;
low.set_int64_val(0);
proto::plan::GenericValue upper;
upper.set_int64_val(100);
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
low,
upper,
false,
false);
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 2);
EXPECT_EQ(reorder[0], 1);
EXPECT_EQ(reorder[1], 0);
}
{
// expr: string2 like '%xx' and string2 == 'xxx'
// disable optimize expr, still remain sequence
proto::plan::GenericValue val1;
val1.set_string_val("%xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
proto::plan::OpType::Match,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_string_val("xxx");
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
proto::plan::OpType::Equal,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
ExecContext context(query_context.get());
OPTIMIZE_EXPR_ENABLED = false;
auto exprs =
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
EXPECT_EQ(exprs.size(), 1);
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
auto phy_expr =
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
exprs[0]);
std::cout << phy_expr->ToString() << std::endl;
auto reorder = phy_expr->GetReorder();
EXPECT_EQ(reorder.size(), 0);
OPTIMIZE_EXPR_ENABLED = true;
}
}

View File

@ -3401,6 +3401,145 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
std::cout << "end compare test" << std::endl; std::cout << "end compare test" << std::endl;
} }
TEST_P(ExprTest, TestReorder) {
auto schema = std::make_shared<Schema>();
auto pk = schema->AddDebugField("id", DataType::INT64);
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
auto bool_1_fid = schema->AddDebugField("bool1", DataType::BOOL);
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
auto int8_1_fid = schema->AddDebugField("int81", DataType::INT8);
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
auto int16_1_fid = schema->AddDebugField("int161", DataType::INT16);
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
auto int32_1_fid = schema->AddDebugField("int321", DataType::INT32);
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
auto int64_1_fid = schema->AddDebugField("int641", DataType::INT64);
auto float_fid = schema->AddDebugField("float", DataType::FLOAT);
auto float_1_fid = schema->AddDebugField("float1", DataType::FLOAT);
auto double_fid = schema->AddDebugField("double", DataType::DOUBLE);
auto double_1_fid = schema->AddDebugField("double1", DataType::DOUBLE);
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
auto json_fid = schema->AddDebugField("json", DataType::JSON, false);
auto str_array_fid =
schema->AddDebugField("str_array", DataType::ARRAY, DataType::VARCHAR);
schema->set_primary_field_id(pk);
auto seg = CreateSealedSegment(schema);
size_t N = 1000;
auto raw_data = DataGen(schema, N);
auto fields = schema->get_fields();
for (auto field_data : raw_data.raw_->fields_data()) {
int64_t field_id = field_data.field_id();
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
auto field_meta = fields.at(FieldId(field_id));
info.channel->push(
CreateFieldDataFromDataArray(N, &field_data, field_meta));
info.channel->close();
seg->LoadFieldData(FieldId(field_id), info);
}
query::ExecPlanNodeVisitor visitor(*seg, MAX_TIMESTAMP);
auto build_expr = [&](int index) -> expr::TypedExprPtr {
switch (index) {
case 0: {
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(str1_fid, DataType::VARCHAR),
proto::plan::OpType::Equal,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::LessThan,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
return expr3;
};
case 1: {
proto::plan::GenericValue val1;
val1.set_string_val("xxx");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(json_fid, DataType::JSON, {"int"}),
proto::plan::OpType::Equal,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::LessThan,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
return expr3;
};
case 2: {
proto::plan::GenericValue val1;
val1.set_string_val("12");
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(str_array_fid, DataType::ARRAY, {"0"}),
proto::plan::OpType::Match,
val1,
std::vector<proto::plan::GenericValue>{});
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::LessThan,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
return expr3;
};
case 3: {
auto expr1 =
std::make_shared<expr::CompareExpr>(int64_fid,
int64_1_fid,
DataType::INT64,
DataType::INT64,
OpType::LessThan);
proto::plan::GenericValue val2;
val2.set_int64_val(100);
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64),
proto::plan::OpType::LessThan,
val2,
std::vector<proto::plan::GenericValue>{});
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
return expr3;
};
default:
PanicInfo(ErrorCode::UnexpectedError, "not implement");
}
};
BitsetType final;
auto expr = build_expr(0);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
expr = build_expr(1);
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
expr = build_expr(2);
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
expr = build_expr(3);
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
}
TEST_P(ExprTest, TestCompareExprNullable) { TEST_P(ExprTest, TestCompareExprNullable) {
auto schema = std::make_shared<Schema>(); auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type); auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);

View File

@ -242,6 +242,9 @@ func (node *QueryNode) InitSegcore() error {
cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64()) cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64())
C.InitDefaultExprEvalBatchSize(cExprBatchSize) C.InitDefaultExprEvalBatchSize(cExprBatchSize)
cOptimizeExprEnabled := C.bool(paramtable.Get().CommonCfg.EnabledOptimizeExpr.GetAsBool())
C.InitDefaultOptimizeExprEnable(cOptimizeExprEnabled)
cGpuMemoryPoolInitSize := C.uint32_t(paramtable.Get().GpuConfig.InitSize.GetAsUint32()) cGpuMemoryPoolInitSize := C.uint32_t(paramtable.Get().GpuConfig.InitSize.GetAsUint32())
cGpuMemoryPoolMaxSize := C.uint32_t(paramtable.Get().GpuConfig.MaxSize.GetAsUint32()) cGpuMemoryPoolMaxSize := C.uint32_t(paramtable.Get().GpuConfig.MaxSize.GetAsUint32())
C.SegcoreSetKnowhereGpuMemoryPoolSize(cGpuMemoryPoolInitSize, cGpuMemoryPoolMaxSize) C.SegcoreSetKnowhereGpuMemoryPoolSize(cGpuMemoryPoolInitSize, cGpuMemoryPoolMaxSize)

View File

@ -292,6 +292,8 @@ type commonConfig struct {
LocalRPCEnabled ParamItem `refreshable:"false"` LocalRPCEnabled ParamItem `refreshable:"false"`
SyncTaskPoolReleaseTimeoutSeconds ParamItem `refreshable:"true"` SyncTaskPoolReleaseTimeoutSeconds ParamItem `refreshable:"true"`
EnabledOptimizeExpr ParamItem `refreshable:"true"`
} }
func (p *commonConfig) init(base *BaseTable) { func (p *commonConfig) init(base *BaseTable) {
@ -994,6 +996,15 @@ This helps Milvus-CDC synchronize incremental data`,
Export: true, Export: true,
} }
p.SyncTaskPoolReleaseTimeoutSeconds.Init(base.mgr) p.SyncTaskPoolReleaseTimeoutSeconds.Init(base.mgr)
p.EnabledOptimizeExpr = ParamItem{
Key: "common.enabledOptimizeExpr",
Version: "2.5.6",
DefaultValue: "true",
Doc: "Indicates whether to enable optimize expr",
Export: true,
}
p.EnabledOptimizeExpr.Init(base.mgr)
} }
type gpuConfig struct { type gpuConfig struct {