mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: reorder sub expr for conjunct expr (#39872)
two point:
(1) reoder conjucts expr's subexpr, postpone heavy operations
sequence: int(column) -> index(column) -> string(column) -> light
conjuct
...... -> json(column) -> heavy conjuct -> two_column_compare
(2) support pre filter for expr execute, skip scan raw data that had
been skipped
because of preceding expr result.
#39869
Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
8db708f67d
commit
6c55db44f1
@ -889,6 +889,7 @@ common:
|
||||
localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode.
|
||||
sync:
|
||||
taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
|
||||
enabledOptimizeExpr: true # Indicates whether to enable optimize expr
|
||||
|
||||
# QuotaConfig, configurations of Milvus quota and limits.
|
||||
# By default, we enable:
|
||||
|
||||
@ -28,6 +28,7 @@ int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT =
|
||||
DEFAULT_LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||
int CPU_NUM = DEFAULT_CPU_NUM;
|
||||
int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||
bool OPTIMIZE_EXPR_ENABLED = DEFAULT_OPTIMIZE_EXPR_ENABLED;
|
||||
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size) {
|
||||
@ -67,4 +68,10 @@ SetCpuNum(const int num) {
|
||||
CPU_NUM = num;
|
||||
}
|
||||
|
||||
void
|
||||
SetDefaultOptimizeExprEnable(bool val) {
|
||||
OPTIMIZE_EXPR_ENABLED = val;
|
||||
LOG_INFO("set default optimize expr enabled: {}", OPTIMIZE_EXPR_ENABLED);
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
||||
@ -29,6 +29,7 @@ extern int64_t MIDDLE_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||
extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||
extern int CPU_NUM;
|
||||
extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||
extern bool OPTIMIZE_EXPR_ENABLED;
|
||||
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size);
|
||||
@ -48,6 +49,9 @@ SetCpuNum(const int core);
|
||||
void
|
||||
SetDefaultExecEvalExprBatchSize(int64_t val);
|
||||
|
||||
void
|
||||
SetDefaultOptimizeExprEnable(bool val);
|
||||
|
||||
struct BufferView {
|
||||
struct Element {
|
||||
const char* data_;
|
||||
|
||||
@ -80,3 +80,4 @@ const size_t MARISA_NULL_KEY_ID = -1;
|
||||
|
||||
const std::string JSON_CAST_TYPE = "json_cast_type";
|
||||
const std::string JSON_PATH = "json_path";
|
||||
const bool DEFAULT_OPTIMIZE_EXPR_ENABLED = true;
|
||||
|
||||
@ -254,6 +254,11 @@ IsFloatDataType(DataType data_type) {
|
||||
}
|
||||
}
|
||||
|
||||
inline bool
|
||||
IsNumericDataType(DataType data_type) {
|
||||
return IsIntegerDataType(data_type) || IsFloatDataType(data_type);
|
||||
}
|
||||
|
||||
inline bool
|
||||
IsStringDataType(DataType data_type) {
|
||||
switch (data_type) {
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
#include "common/Tracer.h"
|
||||
#include "log/Log.h"
|
||||
|
||||
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6;
|
||||
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6, flag7;
|
||||
std::once_flag traceFlag;
|
||||
|
||||
void
|
||||
@ -78,6 +78,14 @@ InitDefaultExprEvalBatchSize(int64_t val) {
|
||||
val);
|
||||
}
|
||||
|
||||
void
|
||||
InitDefaultOptimizeExprEnable(bool val) {
|
||||
std::call_once(
|
||||
flag7,
|
||||
[](bool val) { milvus::SetDefaultOptimizeExprEnable(val); },
|
||||
val);
|
||||
}
|
||||
|
||||
void
|
||||
InitTrace(CTraceConfig* config) {
|
||||
auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
|
||||
|
||||
@ -48,6 +48,9 @@ InitTrace(CTraceConfig* config);
|
||||
void
|
||||
SetTrace(CTraceConfig* config);
|
||||
|
||||
void
|
||||
InitDefaultOptimizeExprEnable(bool val);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -35,8 +35,9 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
|
||||
@ -57,6 +57,21 @@ class PhyAlwaysTrueExpr : public Expr {
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const override {
|
||||
return "[AlwaysTrue]";
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::AlwaysTrueExpr> expr_;
|
||||
int64_t active_count_;
|
||||
|
||||
@ -117,11 +117,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ValueType>(expr_->value_);
|
||||
@ -535,11 +535,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
@ -1435,11 +1435,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
|
||||
|
||||
@ -464,6 +464,21 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
std::string
|
||||
ToString() const override {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
|
||||
@ -28,31 +28,31 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
SetHasOffsetInput((input != nullptr));
|
||||
switch (expr_->column_.data_type_) {
|
||||
case DataType::BOOL: {
|
||||
result = ExecRangeVisitorImpl<bool>(input);
|
||||
result = ExecRangeVisitorImpl<bool>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
result = ExecRangeVisitorImpl<int8_t>(input);
|
||||
result = ExecRangeVisitorImpl<int8_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
result = ExecRangeVisitorImpl<int16_t>(input);
|
||||
result = ExecRangeVisitorImpl<int16_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
result = ExecRangeVisitorImpl<int32_t>(input);
|
||||
result = ExecRangeVisitorImpl<int32_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
result = ExecRangeVisitorImpl<int64_t>(input);
|
||||
result = ExecRangeVisitorImpl<int64_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
result = ExecRangeVisitorImpl<float>(input);
|
||||
result = ExecRangeVisitorImpl<float>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
result = ExecRangeVisitorImpl<double>(input);
|
||||
result = ExecRangeVisitorImpl<double>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
@ -60,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
!storage::MmapManager::GetInstance()
|
||||
.GetMmapConfig()
|
||||
.growing_enable_mmap) {
|
||||
result = ExecRangeVisitorImpl<std::string>(input);
|
||||
result = ExecRangeVisitorImpl<std::string>(context);
|
||||
} else {
|
||||
result = ExecRangeVisitorImpl<std::string_view>(input);
|
||||
result = ExecRangeVisitorImpl<std::string_view>(context);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -70,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
auto value_type = expr_->lower_val_.val_case();
|
||||
switch (value_type) {
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||
result = ExecRangeVisitorImplForJson<int64_t>(input);
|
||||
result = ExecRangeVisitorImplForJson<int64_t>(context);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||
result = ExecRangeVisitorImplForJson<double>(input);
|
||||
result = ExecRangeVisitorImplForJson<double>(context);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||
result = ExecRangeVisitorImplForJson<std::string>(input);
|
||||
result = ExecRangeVisitorImplForJson<std::string>(context);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -95,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
switch (value_type) {
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplForArray<int64_t>(input);
|
||||
result = ExecRangeVisitorImplForArray<int64_t>(context);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplForArray<double>(input);
|
||||
result = ExecRangeVisitorImplForArray<double>(context);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplForArray<std::string>(input);
|
||||
result = ExecRangeVisitorImplForArray<std::string>(context);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -126,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||
if (is_index_mode_ && !has_offset_input_) {
|
||||
return ExecRangeVisitorImplForIndex<T>();
|
||||
} else {
|
||||
return ExecRangeVisitorImplForData<T>(input);
|
||||
return ExecRangeVisitorImplForData<T>(context);
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,7 +235,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
@ -246,6 +246,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
IndexInnerType>
|
||||
HighPrecisionType;
|
||||
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto* input = context.get_offset_input();
|
||||
HighPrecisionType val1;
|
||||
HighPrecisionType val2;
|
||||
bool lower_inclusive = false;
|
||||
@ -260,15 +262,16 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ lower_inclusive,
|
||||
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
||||
[ lower_inclusive, upper_inclusive, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -279,16 +282,44 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
HighPrecisionType val2) {
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFunc<T, true, true, filter_type> func;
|
||||
func(val1, val2, data, size, res, offsets);
|
||||
func(val1,
|
||||
val2,
|
||||
data,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
BinaryRangeElementFunc<T, true, false, filter_type> func;
|
||||
func(val1, val2, data, size, res, offsets);
|
||||
func(val1,
|
||||
val2,
|
||||
data,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFunc<T, false, true, filter_type> func;
|
||||
func(val1, val2, data, size, res, offsets);
|
||||
func(val1,
|
||||
val2,
|
||||
data,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else {
|
||||
BinaryRangeElementFunc<T, false, false, filter_type> func;
|
||||
func(val1, val2, data, size, res, offsets);
|
||||
func(val1,
|
||||
val2,
|
||||
data,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
}
|
||||
// there is a batch operation in BinaryRangeElementFunc,
|
||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||
@ -304,6 +335,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
}
|
||||
}
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
auto skip_index_func =
|
||||
@ -346,20 +378,23 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto* input = context.get_offset_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
@ -372,9 +407,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
ValueType val2 = upper_arg_.GetValue<ValueType>();
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ lower_inclusive, upper_inclusive,
|
||||
pointer ]<FilterType filter_type = FilterType::sequential>(
|
||||
[
|
||||
lower_inclusive,
|
||||
upper_inclusive,
|
||||
pointer,
|
||||
&bitmap_input,
|
||||
&processed_cursor
|
||||
]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -394,6 +435,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
|
||||
@ -406,6 +449,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
@ -419,6 +464,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else {
|
||||
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
|
||||
@ -431,8 +478,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
@ -457,20 +507,22 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto* input = context.get_offset_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
@ -488,9 +540,10 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||
}
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ lower_inclusive,
|
||||
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
||||
[ lower_inclusive, upper_inclusive, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -511,6 +564,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
|
||||
@ -523,6 +578,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
@ -536,6 +593,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
|
||||
} else {
|
||||
@ -549,9 +608,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
size,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
processed_size =
|
||||
|
||||
@ -44,9 +44,17 @@ struct BinaryRangeElementFunc {
|
||||
const T* src,
|
||||
size_t n,
|
||||
TargetBitmapView res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
if constexpr (filter_type == FilterType::random ||
|
||||
std::is_same_v<T, std::string> ||
|
||||
std::is_same_v<T, std::string_view>) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||
continue;
|
||||
}
|
||||
auto offset = (offsets) ? offsets[i] : i;
|
||||
if constexpr (lower_inclusive && upper_inclusive) {
|
||||
res[i] = val1 <= src[offset] && src[offset] <= val2;
|
||||
@ -83,6 +91,9 @@ struct BinaryRangeElementFunc {
|
||||
res[i] = valid_res[i] = false; \
|
||||
break; \
|
||||
} \
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) { \
|
||||
break; \
|
||||
} \
|
||||
auto x = src[offset].template at<GetType>(pointer); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
@ -117,7 +128,10 @@ struct BinaryRangeElementFuncForJson {
|
||||
size_t n,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -153,8 +167,14 @@ struct BinaryRangeElementFuncForArray {
|
||||
size_t n,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||
continue;
|
||||
}
|
||||
size_t offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
offset = (offsets) ? offsets[i] : i;
|
||||
@ -240,6 +260,21 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Check overflow and cache result for performace
|
||||
template <
|
||||
@ -259,7 +294,7 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImpl(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
@ -267,15 +302,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplForData(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplForJson(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplForArray(EvalCtx& context);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||
|
||||
@ -68,6 +68,21 @@ class PhyCallExpr : public Expr {
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::CallExpr> expr_;
|
||||
|
||||
|
||||
@ -114,6 +114,21 @@ class PhyColumnExpr : public Expr {
|
||||
VectorPtr
|
||||
DoEval(OffsetVector* input = nullptr);
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->GetColumn();
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_indexed_;
|
||||
|
||||
|
||||
@ -38,20 +38,20 @@ PhyCompareFilterExpr::GetNextBatchSize() {
|
||||
|
||||
template <typename OpType>
|
||||
VectorPtr
|
||||
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op,
|
||||
OffsetVector* input) {
|
||||
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, EvalCtx& context) {
|
||||
// take offsets as input
|
||||
auto input = context.get_offset_input();
|
||||
if (has_offset_input_) {
|
||||
auto real_batch_size = input->size();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
|
||||
expr_->left_field_id_);
|
||||
@ -215,37 +215,37 @@ PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
// For segment both fields has no index, can use SIMD to speed up.
|
||||
// Avoiding too much call stack that blocks SIMD.
|
||||
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
|
||||
result = ExecCompareExprDispatcherForBothDataSegment(input);
|
||||
result = ExecCompareExprDispatcherForBothDataSegment(context);
|
||||
return;
|
||||
}
|
||||
result = ExecCompareExprDispatcherForHybridSegment(input);
|
||||
result = ExecCompareExprDispatcherForHybridSegment(context);
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
||||
OffsetVector* input) {
|
||||
EvalCtx& context) {
|
||||
switch (expr_->op_type_) {
|
||||
case OpType::Equal: {
|
||||
return ExecCompareExprDispatcher(std::equal_to<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::equal_to<>{}, context);
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
return ExecCompareExprDispatcher(std::not_equal_to<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::not_equal_to<>{}, context);
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
return ExecCompareExprDispatcher(std::greater_equal<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::greater_equal<>{}, context);
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
return ExecCompareExprDispatcher(std::greater<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::greater<>{}, context);
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
return ExecCompareExprDispatcher(std::less_equal<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::less_equal<>{}, context);
|
||||
}
|
||||
case OpType::LessThan: {
|
||||
return ExecCompareExprDispatcher(std::less<>{}, input);
|
||||
return ExecCompareExprDispatcher(std::less<>{}, context);
|
||||
}
|
||||
case OpType::PrefixMatch: {
|
||||
return ExecCompareExprDispatcher(
|
||||
milvus::query::MatchOp<OpType::PrefixMatch>{}, input);
|
||||
milvus::query::MatchOp<OpType::PrefixMatch>{}, context);
|
||||
}
|
||||
// case OpType::PostfixMatch: {
|
||||
// }
|
||||
@ -257,22 +257,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
||||
|
||||
VectorPtr
|
||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
||||
OffsetVector* input) {
|
||||
EvalCtx& context) {
|
||||
switch (expr_->left_data_type_) {
|
||||
case DataType::BOOL:
|
||||
return ExecCompareLeftType<bool>(input);
|
||||
return ExecCompareLeftType<bool>(context);
|
||||
case DataType::INT8:
|
||||
return ExecCompareLeftType<int8_t>(input);
|
||||
return ExecCompareLeftType<int8_t>(context);
|
||||
case DataType::INT16:
|
||||
return ExecCompareLeftType<int16_t>(input);
|
||||
return ExecCompareLeftType<int16_t>(context);
|
||||
case DataType::INT32:
|
||||
return ExecCompareLeftType<int32_t>(input);
|
||||
return ExecCompareLeftType<int32_t>(context);
|
||||
case DataType::INT64:
|
||||
return ExecCompareLeftType<int64_t>(input);
|
||||
return ExecCompareLeftType<int64_t>(context);
|
||||
case DataType::FLOAT:
|
||||
return ExecCompareLeftType<float>(input);
|
||||
return ExecCompareLeftType<float>(context);
|
||||
case DataType::DOUBLE:
|
||||
return ExecCompareLeftType<double>(input);
|
||||
return ExecCompareLeftType<double>(context);
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
@ -283,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
|
||||
PhyCompareFilterExpr::ExecCompareLeftType(EvalCtx& context) {
|
||||
switch (expr_->right_data_type_) {
|
||||
case DataType::BOOL:
|
||||
return ExecCompareRightType<T, bool>(input);
|
||||
return ExecCompareRightType<T, bool>(context);
|
||||
case DataType::INT8:
|
||||
return ExecCompareRightType<T, int8_t>(input);
|
||||
return ExecCompareRightType<T, int8_t>(context);
|
||||
case DataType::INT16:
|
||||
return ExecCompareRightType<T, int16_t>(input);
|
||||
return ExecCompareRightType<T, int16_t>(context);
|
||||
case DataType::INT32:
|
||||
return ExecCompareRightType<T, int32_t>(input);
|
||||
return ExecCompareRightType<T, int32_t>(context);
|
||||
case DataType::INT64:
|
||||
return ExecCompareRightType<T, int64_t>(input);
|
||||
return ExecCompareRightType<T, int64_t>(context);
|
||||
case DataType::FLOAT:
|
||||
return ExecCompareRightType<T, float>(input);
|
||||
return ExecCompareRightType<T, float>(context);
|
||||
case DataType::DOUBLE:
|
||||
return ExecCompareRightType<T, double>(input);
|
||||
return ExecCompareRightType<T, double>(context);
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
@ -309,22 +309,26 @@ PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
|
||||
|
||||
template <typename T, typename U>
|
||||
VectorPtr
|
||||
PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
||||
PhyCompareFilterExpr::ExecCompareRightType(EvalCtx& context) {
|
||||
auto input = context.get_offset_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto expr_type = expr_->op_type_;
|
||||
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
||||
FilterType::sequential>(
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ expr_type, &bitmap_input, &
|
||||
processed_cursor ]<FilterType filter_type = FilterType::sequential>(
|
||||
const T* left,
|
||||
const U* right,
|
||||
const int32_t* offsets,
|
||||
@ -334,36 +338,72 @@ PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
||||
case proto::plan::GreaterThan: {
|
||||
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
|
||||
func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GreaterEqual: {
|
||||
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
|
||||
func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessThan: {
|
||||
CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
|
||||
func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessEqual: {
|
||||
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
|
||||
func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::Equal: {
|
||||
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::NotEqual: {
|
||||
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
|
||||
func;
|
||||
func(left, right, size, res, offsets);
|
||||
func(left,
|
||||
right,
|
||||
size,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -372,6 +412,7 @@ PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
||||
"compare column expr: {}",
|
||||
expr_type));
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
|
||||
@ -40,6 +40,8 @@ struct CompareElementFunc {
|
||||
const U* right,
|
||||
size_t size,
|
||||
TargetBitmapView res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
// This is the original code, kept here for the documentation purposes
|
||||
// also, used for iterative filter
|
||||
@ -69,6 +71,34 @@ struct CompareElementFunc {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!bitmap_input.empty()) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (!bitmap_input[start_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
res[i] = left[i] == right[i];
|
||||
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||
res[i] = left[i] != right[i];
|
||||
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
||||
res[i] = left[i] > right[i];
|
||||
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
||||
res[i] = left[i] < right[i];
|
||||
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
||||
res[i] = left[i] >= right[i];
|
||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||
res[i] = left[i] <= right[i];
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
fmt::format(
|
||||
"unsupported op_type:{} for CompareElementFunc",
|
||||
op));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
|
||||
left, right, size);
|
||||
@ -170,6 +200,21 @@ class PhyCompareFilterExpr : public Expr {
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t
|
||||
GetCurrentRows() {
|
||||
@ -451,21 +496,21 @@ class PhyCompareFilterExpr : public Expr {
|
||||
|
||||
template <typename OpType>
|
||||
VectorPtr
|
||||
ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr);
|
||||
ExecCompareExprDispatcher(OpType op, EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr);
|
||||
ExecCompareExprDispatcherForHybridSegment(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr);
|
||||
ExecCompareExprDispatcherForBothDataSegment(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecCompareLeftType(OffsetVector* input = nullptr);
|
||||
ExecCompareLeftType(EvalCtx& context);
|
||||
|
||||
template <typename T, typename U>
|
||||
VectorPtr
|
||||
ExecCompareRightType(OffsetVector* input = nullptr);
|
||||
ExecCompareRightType(EvalCtx& context);
|
||||
|
||||
private:
|
||||
const FieldId left_field_;
|
||||
|
||||
@ -83,16 +83,22 @@ PhyConjunctFilterExpr::CanSkipFollowingExprs(ColumnVectorPtr& vec) {
|
||||
|
||||
void
|
||||
PhyConjunctFilterExpr::SkipFollowingExprs(int start) {
|
||||
for (int i = start; i < inputs_.size(); ++i) {
|
||||
inputs_[i]->MoveCursor();
|
||||
for (int i = start; i < input_order_.size(); ++i) {
|
||||
inputs_[input_order_[i]]->MoveCursor();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
for (int i = 0; i < inputs_.size(); ++i) {
|
||||
if (input_order_.empty()) {
|
||||
input_order_.resize(inputs_.size());
|
||||
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||
input_order_[i] = i;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < input_order_.size(); ++i) {
|
||||
VectorPtr input_result;
|
||||
inputs_[i]->Eval(context, input_result);
|
||||
inputs_[input_order_[i]]->Eval(context, input_result);
|
||||
if (i == 0) {
|
||||
result = input_result;
|
||||
auto all_flat_result = GetColumnVector(result);
|
||||
@ -100,6 +106,7 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
SkipFollowingExprs(i + 1);
|
||||
return;
|
||||
}
|
||||
SetNextExprBitmapInput(all_flat_result, context);
|
||||
continue;
|
||||
}
|
||||
auto input_flat_result = GetColumnVector(input_result);
|
||||
@ -110,7 +117,9 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
SkipFollowingExprs(i + 1);
|
||||
return;
|
||||
}
|
||||
SetNextExprBitmapInput(all_flat_result, context);
|
||||
}
|
||||
ClearBitmapInput(context);
|
||||
}
|
||||
|
||||
} //namespace exec
|
||||
|
||||
@ -66,7 +66,7 @@ struct ConjunctElementFunc {
|
||||
class PhyConjunctFilterExpr : public Expr {
|
||||
public:
|
||||
PhyConjunctFilterExpr(std::vector<ExprPtr>&& inputs, bool is_and)
|
||||
: Expr(DataType::BOOL, std::move(inputs), is_and ? "and" : "or"),
|
||||
: Expr(DataType::BOOL, std::move(inputs), "PhyConjunctFilterExpr"),
|
||||
is_and_(is_and) {
|
||||
std::vector<DataType> input_types;
|
||||
input_types.reserve(inputs_.size());
|
||||
@ -101,6 +101,63 @@ class PhyConjunctFilterExpr : public Expr {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
if (!input_order_.empty()) {
|
||||
std::vector<std::string> inputs;
|
||||
for (auto& i : input_order_) {
|
||||
inputs.push_back(inputs_[i]->ToString());
|
||||
}
|
||||
std::string input_str =
|
||||
is_and_ ? Join(inputs, " && ") : Join(inputs, " || ");
|
||||
return fmt::format("[ConjuctExpr:{}]", input_str);
|
||||
}
|
||||
std::vector<std::string> inputs;
|
||||
for (auto& in : inputs_) {
|
||||
inputs.push_back(in->ToString());
|
||||
}
|
||||
std::string input_str =
|
||||
is_and_ ? Join(inputs, " && ") : Join(inputs, "||");
|
||||
return fmt::format("[ConjuctExpr:{}]", input_str);
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void
|
||||
Reorder(const std::vector<size_t>& exprs_order) {
|
||||
input_order_ = exprs_order;
|
||||
}
|
||||
|
||||
std::vector<size_t>
|
||||
GetReorder() {
|
||||
return input_order_;
|
||||
}
|
||||
|
||||
void
|
||||
SetNextExprBitmapInput(const ColumnVectorPtr& vec, EvalCtx& context) {
|
||||
TargetBitmapView last_res_bitmap(vec->GetRawData(), vec->size());
|
||||
TargetBitmap next_input_bitmap(last_res_bitmap);
|
||||
if (is_and_) {
|
||||
context.set_bitmap_input(std::move(next_input_bitmap));
|
||||
} else {
|
||||
next_input_bitmap.flip();
|
||||
context.set_bitmap_input(std::move(next_input_bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ClearBitmapInput(EvalCtx& context) {
|
||||
context.clear_bitmap_input();
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t
|
||||
UpdateResult(ColumnVectorPtr& input_result,
|
||||
@ -117,7 +174,7 @@ class PhyConjunctFilterExpr : public Expr {
|
||||
SkipFollowingExprs(int start);
|
||||
// true if conjunction (and), false if disjunction (or).
|
||||
bool is_and_;
|
||||
std::vector<int32_t> input_order_;
|
||||
std::vector<size_t> input_order_;
|
||||
};
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
@ -69,12 +69,30 @@ class EvalCtx {
|
||||
offset_input_ = offset_input;
|
||||
}
|
||||
|
||||
inline void
|
||||
set_bitmap_input(TargetBitmap&& bitmap_input) {
|
||||
bitmap_input_ = std::move(bitmap_input);
|
||||
}
|
||||
|
||||
inline const TargetBitmap&
|
||||
get_bitmap_input() const {
|
||||
return bitmap_input_;
|
||||
}
|
||||
|
||||
void
|
||||
clear_bitmap_input() {
|
||||
bitmap_input_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
ExecContext* exec_ctx_ = nullptr;
|
||||
ExprSet* expr_set_ = nullptr;
|
||||
// we may accept offsets array as input and do expr filtering on these data
|
||||
OffsetVector* offset_input_ = nullptr;
|
||||
bool input_no_nulls_ = false;
|
||||
|
||||
// used for expr pre filter, that avoid unnecessary execution on filtered data
|
||||
TargetBitmap bitmap_input_;
|
||||
};
|
||||
|
||||
} // namespace exec
|
||||
|
||||
@ -30,7 +30,7 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
PanicInfo(ExprInvalid,
|
||||
"exists expr for json index mode not supported");
|
||||
}
|
||||
result = EvalJsonExistsForDataSegment(input);
|
||||
result = EvalJsonExistsForDataSegment(context);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -41,21 +41,26 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
||||
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&bitmap_input, &
|
||||
processed_cursor ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -63,6 +68,7 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -72,8 +78,12 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = data[offset].exist(pointer);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
|
||||
@ -57,9 +57,24 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
VectorPtr
|
||||
EvalJsonExistsForDataSegment(OffsetVector* input = nullptr);
|
||||
EvalJsonExistsForDataSegment(EvalCtx& context);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
||||
|
||||
@ -67,7 +67,9 @@ CompileExpressions(const std::vector<expr::TypedExprPtr>& sources,
|
||||
enable_constant_folding));
|
||||
}
|
||||
|
||||
if (OPTIMIZE_EXPR_ENABLED) {
|
||||
OptimizeCompiledExprs(context, exprs);
|
||||
}
|
||||
|
||||
return exprs;
|
||||
}
|
||||
@ -303,9 +305,174 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
IsLikeExpr(std::shared_ptr<Expr> input) {
|
||||
if (input->name() == "PhyUnaryRangeFilterExpr") {
|
||||
auto optype = std::static_pointer_cast<PhyUnaryRangeFilterExpr>(input)
|
||||
->GetLogicalExpr()
|
||||
->op_type_;
|
||||
switch (optype) {
|
||||
case proto::plan::PrefixMatch:
|
||||
case proto::plan::PostfixMatch:
|
||||
case proto::plan::Match:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void
|
||||
ReorderConjunctExpr(std::shared_ptr<milvus::exec::PhyConjunctFilterExpr>& expr,
|
||||
ExecContext* context,
|
||||
bool& has_heavy_operation) {
|
||||
auto* segment = context->get_query_context()->get_segment();
|
||||
if (!segment || !expr) {
|
||||
return;
|
||||
}
|
||||
std::vector<size_t> reorder;
|
||||
std::vector<size_t> numeric_expr;
|
||||
std::vector<size_t> indexed_expr;
|
||||
std::vector<size_t> string_expr;
|
||||
std::vector<size_t> str_like_expr;
|
||||
std::vector<size_t> json_expr;
|
||||
std::vector<size_t> json_like_expr;
|
||||
std::vector<size_t> array_expr;
|
||||
std::vector<size_t> array_like_expr;
|
||||
std::vector<size_t> compare_expr;
|
||||
std::vector<size_t> other_expr;
|
||||
std::vector<size_t> heavy_conjunct_expr;
|
||||
std::vector<size_t> light_conjunct_expr;
|
||||
|
||||
const auto& inputs = expr->GetInputsRef();
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
auto input = inputs[i];
|
||||
|
||||
if (input->IsSource() && input->GetColumnInfo().has_value()) {
|
||||
auto column = input->GetColumnInfo().value();
|
||||
if (IsNumericDataType(column.data_type_)) {
|
||||
numeric_expr.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (segment->HasIndex(column.field_id_)) {
|
||||
indexed_expr.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsStringDataType(column.data_type_)) {
|
||||
auto is_like_expr = IsLikeExpr(input);
|
||||
if (is_like_expr) {
|
||||
str_like_expr.push_back(i);
|
||||
has_heavy_operation = true;
|
||||
} else {
|
||||
string_expr.push_back(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsArrayDataType(column.data_type_)) {
|
||||
auto is_like_expr = IsLikeExpr(input);
|
||||
if (is_like_expr) {
|
||||
array_like_expr.push_back(i);
|
||||
has_heavy_operation = true;
|
||||
} else {
|
||||
array_expr.push_back(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsJsonDataType(column.data_type_)) {
|
||||
auto is_like_expr = IsLikeExpr(input);
|
||||
if (is_like_expr) {
|
||||
json_like_expr.push_back(i);
|
||||
} else {
|
||||
json_expr.push_back(i);
|
||||
}
|
||||
has_heavy_operation = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (input->name() == "PhyConjunctFilterExpr") {
|
||||
bool sub_expr_heavy = false;
|
||||
auto expr = std::static_pointer_cast<PhyConjunctFilterExpr>(input);
|
||||
ReorderConjunctExpr(expr, context, sub_expr_heavy);
|
||||
has_heavy_operation |= sub_expr_heavy;
|
||||
if (sub_expr_heavy) {
|
||||
heavy_conjunct_expr.push_back(i);
|
||||
} else {
|
||||
light_conjunct_expr.push_back(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (input->name() == "PhyCompareFilterExpr") {
|
||||
compare_expr.push_back(i);
|
||||
has_heavy_operation = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
other_expr.push_back(i);
|
||||
}
|
||||
|
||||
reorder.reserve(inputs.size());
|
||||
// Final reorder sequence:
|
||||
// 1. Numeric column expressions (fastest to evaluate)
|
||||
// 2. Indexed column expressions (can use index for efficient filtering)
|
||||
// 3. String column expressions
|
||||
// 4. Light conjunct expressions (conjunctions without heavy operations)
|
||||
// 5. Other expressions
|
||||
// 6. Array column expression
|
||||
// 7. String like expression
|
||||
// 8. Array like expression
|
||||
// 9. JSON column expressions (expensive to evaluate)
|
||||
// 10. JSON like expression (more expensive than common json compare)
|
||||
// 11. Heavy conjunct expressions (conjunctions with heavy operations)
|
||||
// 12. Compare filter expressions (most expensive, comparing two columns)
|
||||
reorder.insert(reorder.end(), numeric_expr.begin(), numeric_expr.end());
|
||||
reorder.insert(reorder.end(), indexed_expr.begin(), indexed_expr.end());
|
||||
reorder.insert(reorder.end(), string_expr.begin(), string_expr.end());
|
||||
reorder.insert(
|
||||
reorder.end(), light_conjunct_expr.begin(), light_conjunct_expr.end());
|
||||
reorder.insert(reorder.end(), other_expr.begin(), other_expr.end());
|
||||
reorder.insert(reorder.end(), array_expr.begin(), array_expr.end());
|
||||
reorder.insert(reorder.end(), str_like_expr.begin(), str_like_expr.end());
|
||||
reorder.insert(
|
||||
reorder.end(), array_like_expr.begin(), array_like_expr.end());
|
||||
reorder.insert(reorder.end(), json_expr.begin(), json_expr.end());
|
||||
reorder.insert(reorder.end(), json_like_expr.begin(), json_like_expr.end());
|
||||
reorder.insert(
|
||||
reorder.end(), heavy_conjunct_expr.begin(), heavy_conjunct_expr.end());
|
||||
reorder.insert(reorder.end(), compare_expr.begin(), compare_expr.end());
|
||||
|
||||
AssertInfo(reorder.size() == inputs.size(),
|
||||
"reorder size:{} but input size:{}",
|
||||
reorder.size(),
|
||||
inputs.size());
|
||||
|
||||
expr->Reorder(reorder);
|
||||
}
|
||||
|
||||
inline void
|
||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
||||
//TODO: add optimization pattern
|
||||
std::chrono::high_resolution_clock::time_point start =
|
||||
std::chrono::high_resolution_clock::now();
|
||||
for (const auto& expr : exprs) {
|
||||
if (expr->name() == "PhyConjunctFilterExpr") {
|
||||
LOG_DEBUG("before reoder filter expression: {}", expr->ToString());
|
||||
auto conjunct_expr =
|
||||
std::static_pointer_cast<PhyConjunctFilterExpr>(expr);
|
||||
bool has_heavy_operation = false;
|
||||
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
|
||||
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
|
||||
}
|
||||
}
|
||||
std::chrono::high_resolution_clock::time_point end =
|
||||
std::chrono::high_resolution_clock::now();
|
||||
double cost =
|
||||
std::chrono::duration<double, std::micro>(end - start).count();
|
||||
monitor::internal_core_optimize_expr_latency.Observe(cost / 1000);
|
||||
}
|
||||
|
||||
} // namespace exec
|
||||
|
||||
@ -64,7 +64,7 @@ class Expr {
|
||||
}
|
||||
|
||||
std::string
|
||||
get_name() {
|
||||
name() {
|
||||
return name_;
|
||||
}
|
||||
|
||||
@ -88,9 +88,29 @@ class Expr {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual std::string
|
||||
ToString() const {
|
||||
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||
}
|
||||
|
||||
virtual bool
|
||||
IsSource() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const {
|
||||
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Expr>>&
|
||||
GetInputsRef() {
|
||||
return inputs_;
|
||||
}
|
||||
|
||||
protected:
|
||||
DataType type_;
|
||||
const std::vector<std::shared_ptr<Expr>> inputs_;
|
||||
std::vector<std::shared_ptr<Expr>> inputs_;
|
||||
std::string name_;
|
||||
// NOTE: unused
|
||||
std::shared_ptr<VectorFunction> vector_func_;
|
||||
@ -167,6 +187,11 @@ class SegmentExpr : public Expr {
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
MoveCursorForDataMultipleChunk() {
|
||||
int64_t processed_size = 0;
|
||||
@ -1142,6 +1167,9 @@ class SegmentExpr : public Expr {
|
||||
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
||||
};
|
||||
|
||||
bool
|
||||
IsLikeExpr(std::shared_ptr<Expr> expr);
|
||||
|
||||
void
|
||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs);
|
||||
|
||||
|
||||
@ -30,17 +30,17 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
if (is_index_mode_ && !has_offset_input_) {
|
||||
result = EvalArrayContainsForIndexSegment();
|
||||
} else {
|
||||
result = EvalJsonContainsForDataSegment(input);
|
||||
result = EvalJsonContainsForDataSegment(context);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
if (is_index_mode_ && !has_offset_input_) {
|
||||
PanicInfo(
|
||||
ExprInvalid,
|
||||
"exists expr for json or array index mode not supported");
|
||||
if (is_index_mode_ && !context.get_offset_input()) {
|
||||
PanicInfo(ExprInvalid,
|
||||
"exists expr for json or array index mode not "
|
||||
"supported");
|
||||
}
|
||||
result = EvalJsonContainsForDataSegment(input);
|
||||
result = EvalJsonContainsForDataSegment(context);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -51,7 +51,7 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(EvalCtx& context) {
|
||||
auto data_type = expr_->column_.data_type_;
|
||||
switch (expr_->op_) {
|
||||
case proto::plan::JSONContainsExpr_JSONOp_Contains:
|
||||
@ -60,16 +60,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
auto val_type = expr_->vals_[0].val_case();
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
return ExecArrayContains<bool>(input);
|
||||
return ExecArrayContains<bool>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
return ExecArrayContains<int64_t>(input);
|
||||
return ExecArrayContains<int64_t>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
return ExecArrayContains<double>(input);
|
||||
return ExecArrayContains<double>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
return ExecArrayContains<std::string>(input);
|
||||
return ExecArrayContains<std::string>(context);
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
@ -81,19 +81,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
auto val_type = expr_->vals_[0].val_case();
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
return ExecJsonContains<bool>(input);
|
||||
return ExecJsonContains<bool>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
return ExecJsonContains<int64_t>(input);
|
||||
return ExecJsonContains<int64_t>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
return ExecJsonContains<double>(input);
|
||||
return ExecJsonContains<double>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
return ExecJsonContains<std::string>(input);
|
||||
return ExecJsonContains<std::string>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kArrayVal: {
|
||||
return ExecJsonContainsArray(input);
|
||||
return ExecJsonContainsArray(context);
|
||||
}
|
||||
default:
|
||||
PanicInfo(DataTypeInvalid,
|
||||
@ -101,7 +101,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
val_type);
|
||||
}
|
||||
} else {
|
||||
return ExecJsonContainsWithDiffType(input);
|
||||
return ExecJsonContainsWithDiffType(context);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -110,16 +110,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
auto val_type = expr_->vals_[0].val_case();
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
return ExecArrayContainsAll<bool>(input);
|
||||
return ExecArrayContainsAll<bool>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
return ExecArrayContainsAll<int64_t>(input);
|
||||
return ExecArrayContainsAll<int64_t>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
return ExecArrayContainsAll<double>(input);
|
||||
return ExecArrayContainsAll<double>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
return ExecArrayContainsAll<std::string>(input);
|
||||
return ExecArrayContainsAll<std::string>(context);
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
@ -131,19 +131,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
auto val_type = expr_->vals_[0].val_case();
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
return ExecJsonContainsAll<bool>(input);
|
||||
return ExecJsonContainsAll<bool>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
return ExecJsonContainsAll<int64_t>(input);
|
||||
return ExecJsonContainsAll<int64_t>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
return ExecJsonContainsAll<double>(input);
|
||||
return ExecJsonContainsAll<double>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
return ExecJsonContainsAll<std::string>(input);
|
||||
return ExecJsonContainsAll<std::string>(context);
|
||||
}
|
||||
case proto::plan::GenericValue::kArrayVal: {
|
||||
return ExecJsonContainsAllArray(input);
|
||||
return ExecJsonContainsAllArray(context);
|
||||
}
|
||||
default:
|
||||
PanicInfo(DataTypeInvalid,
|
||||
@ -151,7 +151,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
val_type);
|
||||
}
|
||||
} else {
|
||||
return ExecJsonContainsAllWithDiffType(input);
|
||||
return ExecJsonContainsAllWithDiffType(context);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -164,11 +164,13 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecArrayContains(EvalCtx& context) {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
@ -177,18 +179,21 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||
"[ExecArrayContains]nested path must be null");
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -205,6 +210,7 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -214,8 +220,12 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -241,30 +251,36 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -290,6 +306,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -299,8 +316,12 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -329,26 +350,31 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::vector<proto::plan::Array> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||
}
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -383,6 +409,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -392,8 +419,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -423,11 +454,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||
"[ExecArrayContainsAll]nested path must be null");
|
||||
auto real_batch_size =
|
||||
@ -436,19 +469,21 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -467,6 +502,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
}
|
||||
return tmp_elements.size() == 0;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -476,8 +512,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -503,22 +543,24 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::set<GetType> elements;
|
||||
@ -526,8 +568,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -556,6 +600,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
}
|
||||
return tmp_elements.size() == 0;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -565,8 +610,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -595,18 +644,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
||||
OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
@ -618,8 +668,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
||||
i++;
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -707,6 +759,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
||||
}
|
||||
return tmp_elements_index.size() == 0;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -716,8 +769,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -748,18 +806,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
@ -767,8 +827,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||
}
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -807,6 +870,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
||||
}
|
||||
return exist_elements_index.size() == elements.size();
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -816,8 +880,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -846,18 +915,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
@ -869,8 +940,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
||||
i++;
|
||||
}
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -949,6 +1022,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -958,8 +1032,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
|
||||
@ -51,37 +51,52 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
VectorPtr
|
||||
EvalJsonContainsForDataSegment(OffsetVector* input = nullptr);
|
||||
EvalJsonContainsForDataSegment(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecJsonContains(OffsetVector* input = nullptr);
|
||||
ExecJsonContains(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecArrayContains(OffsetVector* input = nullptr);
|
||||
ExecArrayContains(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecJsonContainsAll(OffsetVector* input = nullptr);
|
||||
ExecJsonContainsAll(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecArrayContainsAll(OffsetVector* input = nullptr);
|
||||
ExecArrayContainsAll(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsArray(OffsetVector* input = nullptr);
|
||||
ExecJsonContainsArray(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllArray(OffsetVector* input = nullptr);
|
||||
ExecJsonContainsAllArray(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr);
|
||||
ExecJsonContainsAllWithDiffType(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsWithDiffType(OffsetVector* input = nullptr);
|
||||
ExecJsonContainsWithDiffType(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
EvalArrayContainsForIndexSegment();
|
||||
|
||||
@ -87,6 +87,21 @@ class PhyLogicalBinaryExpr : public Expr {
|
||||
inputs_[1]->SupportOffsetInput();
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::LogicalBinaryExpr> expr_;
|
||||
};
|
||||
|
||||
@ -51,6 +51,21 @@ class PhyLogicalUnaryExpr : public Expr {
|
||||
return inputs_[0]->SupportOffsetInput();
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::LogicalUnaryExpr> expr_;
|
||||
};
|
||||
|
||||
@ -50,6 +50,21 @@ class PhyNullExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
ColumnVectorPtr
|
||||
PreCheckNullable(OffsetVector* input);
|
||||
|
||||
@ -32,31 +32,31 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
}
|
||||
switch (expr_->column_.data_type_) {
|
||||
case DataType::BOOL: {
|
||||
result = ExecVisitorImpl<bool>(input);
|
||||
result = ExecVisitorImpl<bool>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
result = ExecVisitorImpl<int8_t>(input);
|
||||
result = ExecVisitorImpl<int8_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
result = ExecVisitorImpl<int16_t>(input);
|
||||
result = ExecVisitorImpl<int16_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
result = ExecVisitorImpl<int32_t>(input);
|
||||
result = ExecVisitorImpl<int32_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
result = ExecVisitorImpl<int64_t>(input);
|
||||
result = ExecVisitorImpl<int64_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
result = ExecVisitorImpl<float>(input);
|
||||
result = ExecVisitorImpl<float>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
result = ExecVisitorImpl<double>(input);
|
||||
result = ExecVisitorImpl<double>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
@ -64,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
!storage::MmapManager::GetInstance()
|
||||
.GetMmapConfig()
|
||||
.growing_enable_mmap) {
|
||||
result = ExecVisitorImpl<std::string>(input);
|
||||
result = ExecVisitorImpl<std::string>(context);
|
||||
} else {
|
||||
result = ExecVisitorImpl<std::string_view>(input);
|
||||
result = ExecVisitorImpl<std::string_view>(context);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
if (expr_->vals_.size() == 0) {
|
||||
result = ExecVisitorImplTemplateJson<bool>(input);
|
||||
result = ExecVisitorImplTemplateJson<bool>(context);
|
||||
break;
|
||||
}
|
||||
auto type = expr_->vals_[0].val_case();
|
||||
switch (type) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||
result = ExecVisitorImplTemplateJson<bool>(input);
|
||||
result = ExecVisitorImplTemplateJson<bool>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
result = ExecVisitorImplTemplateJson<int64_t>(input);
|
||||
result = ExecVisitorImplTemplateJson<int64_t>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
result = ExecVisitorImplTemplateJson<double>(input);
|
||||
result = ExecVisitorImplTemplateJson<double>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||
result = ExecVisitorImplTemplateJson<std::string>(input);
|
||||
result = ExecVisitorImplTemplateJson<std::string>(context);
|
||||
break;
|
||||
default:
|
||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||
@ -97,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
case DataType::ARRAY: {
|
||||
if (expr_->vals_.size() == 0) {
|
||||
SetNotUseIndex();
|
||||
result = ExecVisitorImplTemplateArray<bool>(input);
|
||||
result = ExecVisitorImplTemplateArray<bool>(context);
|
||||
break;
|
||||
}
|
||||
auto type = expr_->vals_[0].val_case();
|
||||
switch (type) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecVisitorImplTemplateArray<bool>(input);
|
||||
result = ExecVisitorImplTemplateArray<bool>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
SetNotUseIndex();
|
||||
result = ExecVisitorImplTemplateArray<int64_t>(input);
|
||||
result = ExecVisitorImplTemplateArray<int64_t>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecVisitorImplTemplateArray<double>(input);
|
||||
result = ExecVisitorImplTemplateArray<double>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecVisitorImplTemplateArray<std::string>(input);
|
||||
result = ExecVisitorImplTemplateArray<std::string>(context);
|
||||
break;
|
||||
default:
|
||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||
@ -216,12 +216,11 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
// pk valid_bitmap is always all true
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto current_chunk_view =
|
||||
cached_bits_.view(current_data_chunk_pos_, real_batch_size);
|
||||
@ -233,9 +232,9 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImplTemplateJson(EvalCtx& context) {
|
||||
if (expr_->is_in_field_) {
|
||||
return ExecTermJsonVariableInField<ValueType>(input);
|
||||
return ExecTermJsonVariableInField<ValueType>(context);
|
||||
} else {
|
||||
if (is_index_mode_) {
|
||||
// we create double index for json int64 field for now
|
||||
@ -243,40 +242,42 @@ PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
|
||||
std::conditional_t<std::is_same_v<ValueType, int64_t>,
|
||||
double,
|
||||
ValueType>;
|
||||
return ExecVisitorImplForIndex<GetType>(input);
|
||||
return ExecVisitorImplForIndex<GetType>();
|
||||
} else {
|
||||
return ExecTermJsonFieldInVariable<ValueType>(input);
|
||||
return ExecTermJsonFieldInVariable<ValueType>(context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImplTemplateArray(EvalCtx& context) {
|
||||
if (expr_->is_in_field_) {
|
||||
return ExecTermArrayVariableInField<ValueType>(input);
|
||||
return ExecTermArrayVariableInField<ValueType>(context);
|
||||
} else {
|
||||
return ExecTermArrayFieldInVariable<ValueType>(input);
|
||||
return ExecTermArrayFieldInVariable<ValueType>(context);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecTermArrayVariableInField(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
@ -286,8 +287,10 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
}
|
||||
auto target_val = arg_val_.GetValue<ValueType>();
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -304,6 +307,7 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -313,8 +317,12 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -340,22 +348,24 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
@ -372,8 +382,10 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
return res_vec;
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -382,6 +394,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
TargetBitmapView valid_res,
|
||||
int index,
|
||||
const std::shared_ptr<MultiElement>& term_set) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -395,9 +408,13 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
auto value = data[offset].get_data<GetType>(index);
|
||||
res[i] = term_set->In(ValueType(value));
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
@ -428,21 +445,23 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
@ -454,8 +473,10 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -480,6 +501,7 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
}
|
||||
return false;
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -489,8 +511,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
@ -515,21 +541,25 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
if (!arg_inited_) {
|
||||
@ -543,8 +573,10 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
return res_vec;
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -571,6 +603,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
}
|
||||
return terms->In(ValueType(x.value()));
|
||||
};
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -584,8 +617,13 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(offset);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
@ -614,17 +652,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImpl(EvalCtx& context) {
|
||||
if (is_index_mode_ && !has_offset_input_) {
|
||||
return ExecVisitorImplForIndex<T>(input);
|
||||
return ExecVisitorImplForIndex<T>();
|
||||
} else {
|
||||
return ExecVisitorImplForData<T>(input);
|
||||
return ExecVisitorImplForData<T>(context);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
@ -667,7 +705,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
|
||||
|
||||
template <>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
||||
using Index = index::ScalarIndex<bool>;
|
||||
auto real_batch_size = GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
@ -689,18 +727,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
std::vector<T> vals;
|
||||
@ -717,8 +758,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
[&processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -726,6 +769,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::shared_ptr<MultiElement>& vals) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -735,8 +779,12 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
res[i] = vals->In(data[offset]);
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
|
||||
@ -75,6 +75,21 @@ class PhyTermFilterExpr : public SegmentExpr {
|
||||
void
|
||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
InitPkCacheOffset();
|
||||
@ -88,39 +103,39 @@ class PhyTermFilterExpr : public SegmentExpr {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecVisitorImpl(OffsetVector* input = nullptr);
|
||||
ExecVisitorImpl(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecVisitorImplForIndex(OffsetVector* input = nullptr);
|
||||
ExecVisitorImplForIndex();
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecVisitorImplForData(OffsetVector* input = nullptr);
|
||||
ExecVisitorImplForData(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecVisitorImplTemplateJson(OffsetVector* input = nullptr);
|
||||
ExecVisitorImplTemplateJson(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecTermJsonVariableInField(OffsetVector* input = nullptr);
|
||||
ExecTermJsonVariableInField(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecTermJsonFieldInVariable(OffsetVector* input = nullptr);
|
||||
ExecTermJsonFieldInVariable(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecVisitorImplTemplateArray(OffsetVector* input = nullptr);
|
||||
ExecVisitorImplTemplateArray(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecTermArrayVariableInField(OffsetVector* input = nullptr);
|
||||
ExecTermArrayVariableInField(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecTermArrayFieldInVariable(OffsetVector* input = nullptr);
|
||||
ExecTermArrayFieldInVariable(EvalCtx& context);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
||||
|
||||
@ -89,51 +89,51 @@ PhyUnaryRangeFilterExpr::CanUseIndexForArray<milvus::Array>() {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex() {
|
||||
return ExecRangeVisitorImplArray<T>();
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex(EvalCtx& context) {
|
||||
return ExecRangeVisitorImplArray<T>(context);
|
||||
}
|
||||
|
||||
template <>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
||||
proto::plan::Array>() {
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<proto::plan::Array>(
|
||||
EvalCtx& context) {
|
||||
switch (expr_->op_type_) {
|
||||
case proto::plan::Equal:
|
||||
case proto::plan::NotEqual: {
|
||||
switch (expr_->column_.element_type_) {
|
||||
case DataType::BOOL: {
|
||||
return ExecArrayEqualForIndex<bool>(expr_->op_type_ ==
|
||||
proto::plan::NotEqual);
|
||||
return ExecArrayEqualForIndex<bool>(
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
return ExecArrayEqualForIndex<int8_t>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
return ExecArrayEqualForIndex<int16_t>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return ExecArrayEqualForIndex<int32_t>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return ExecArrayEqualForIndex<int64_t>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
case DataType::FLOAT:
|
||||
case DataType::DOUBLE: {
|
||||
// not accurate on floating point number, rollback to bruteforce.
|
||||
return ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||
nullptr);
|
||||
context);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
return ExecArrayEqualForIndex<std::string>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
} else {
|
||||
return ExecArrayEqualForIndex<std::string_view>(
|
||||
expr_->op_type_ == proto::plan::NotEqual);
|
||||
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||
}
|
||||
}
|
||||
default:
|
||||
@ -144,7 +144,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
||||
}
|
||||
}
|
||||
default:
|
||||
return ExecRangeVisitorImplArray<proto::plan::Array>();
|
||||
return ExecRangeVisitorImplArray<proto::plan::Array>(context);
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,31 +154,31 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
SetHasOffsetInput((input != nullptr));
|
||||
switch (expr_->column_.data_type_) {
|
||||
case DataType::BOOL: {
|
||||
result = ExecRangeVisitorImpl<bool>(input);
|
||||
result = ExecRangeVisitorImpl<bool>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
result = ExecRangeVisitorImpl<int8_t>(input);
|
||||
result = ExecRangeVisitorImpl<int8_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
result = ExecRangeVisitorImpl<int16_t>(input);
|
||||
result = ExecRangeVisitorImpl<int16_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
result = ExecRangeVisitorImpl<int32_t>(input);
|
||||
result = ExecRangeVisitorImpl<int32_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
result = ExecRangeVisitorImpl<int64_t>(input);
|
||||
result = ExecRangeVisitorImpl<int64_t>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
result = ExecRangeVisitorImpl<float>(input);
|
||||
result = ExecRangeVisitorImpl<float>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
result = ExecRangeVisitorImpl<double>(input);
|
||||
result = ExecRangeVisitorImpl<double>(context);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
@ -186,9 +186,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
!storage::MmapManager::GetInstance()
|
||||
.GetMmapConfig()
|
||||
.growing_enable_mmap) {
|
||||
result = ExecRangeVisitorImpl<std::string>(input);
|
||||
result = ExecRangeVisitorImpl<std::string>(context);
|
||||
} else {
|
||||
result = ExecRangeVisitorImpl<std::string_view>(input);
|
||||
result = ExecRangeVisitorImpl<std::string_view>(context);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -227,20 +227,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
} else {
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||
result = ExecRangeVisitorImplJson<bool>(input);
|
||||
result = ExecRangeVisitorImplJson<bool>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
result = ExecRangeVisitorImplJson<int64_t>(input);
|
||||
result = ExecRangeVisitorImplJson<int64_t>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
result = ExecRangeVisitorImplJson<double>(input);
|
||||
result = ExecRangeVisitorImplJson<double>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||
result = ExecRangeVisitorImplJson<std::string>(input);
|
||||
result = ExecRangeVisitorImplJson<std::string>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||
result =
|
||||
ExecRangeVisitorImplJson<proto::plan::Array>(input);
|
||||
result = ExecRangeVisitorImplJson<proto::plan::Array>(
|
||||
context);
|
||||
break;
|
||||
default:
|
||||
PanicInfo(
|
||||
@ -254,28 +254,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
switch (val_type) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplArray<bool>(input);
|
||||
result = ExecRangeVisitorImplArray<bool>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplArray<int64_t>(input);
|
||||
result = ExecRangeVisitorImplArray<int64_t>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplArray<double>(input);
|
||||
result = ExecRangeVisitorImplArray<double>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||
SetNotUseIndex();
|
||||
result = ExecRangeVisitorImplArray<std::string>(input);
|
||||
result = ExecRangeVisitorImplArray<std::string>(context);
|
||||
break;
|
||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||
if (!has_offset_input_ &&
|
||||
CanUseIndexForArray<milvus::Array>()) {
|
||||
result = ExecRangeVisitorImplArrayForIndex<
|
||||
proto::plan::Array>();
|
||||
proto::plan::Array>(context);
|
||||
} else {
|
||||
result = ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||
input);
|
||||
context);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -293,17 +293,19 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ValueType>(expr_->val_);
|
||||
@ -315,8 +317,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||
}
|
||||
auto execute_sub_batch = [op_type]<FilterType filter_type =
|
||||
FilterType::sequential>(
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ op_type, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -338,6 +342,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -353,6 +359,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -368,6 +376,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -383,6 +393,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -398,6 +410,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -413,6 +427,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -428,6 +444,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -443,6 +461,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
@ -452,6 +472,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
fmt::format("unsupported operator type for unary expr: {}",
|
||||
op_type));
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
@ -477,7 +498,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
||||
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
|
||||
bool reverse) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
@ -491,7 +513,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
||||
auto val = GetValueFromProto<proto::plan::Array>(expr_->val_);
|
||||
if (val.array_size() == 0) {
|
||||
// rollback to bruteforce. no candidates will be filtered out via index.
|
||||
return ExecRangeVisitorImplArray<proto::plan::Array>();
|
||||
return ExecRangeVisitorImplArray<proto::plan::Array>(context);
|
||||
}
|
||||
|
||||
// cache the result to suit the framework.
|
||||
@ -587,11 +609,14 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
@ -602,13 +627,13 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
value_arg_.SetValue<ExprValueType>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
ExprValueType val = value_arg_.GetValue<ExprValueType>();
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
ExprValueType val = value_arg_.GetValue<ExprValueType>();
|
||||
auto op_type = expr_->op_type_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
@ -642,8 +667,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = (cmp); \
|
||||
} while (false)
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ op_type, pointer ]<FilterType filter_type = FilterType::sequential>(
|
||||
[ op_type, pointer, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -651,6 +678,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ExprValueType val) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
@ -662,6 +690,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -680,6 +712,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -698,6 +734,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -716,6 +756,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -734,6 +778,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -758,6 +806,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -782,6 +834,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -804,6 +860,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input &&
|
||||
!bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
@ -819,6 +879,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
fmt::format("unsupported operator type for unary expr: {}",
|
||||
op_type));
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
@ -839,7 +900,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||
if (expr_->op_type_ == proto::plan::OpType::TextMatch ||
|
||||
expr_->op_type_ == proto::plan::OpType::PhraseMatch) {
|
||||
if (has_offset_input_) {
|
||||
@ -853,7 +914,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
||||
if (CanUseIndex<T>() && !has_offset_input_) {
|
||||
return ExecRangeVisitorImplForIndex<T>();
|
||||
} else {
|
||||
return ExecRangeVisitorImplForData<T>(input);
|
||||
return ExecRangeVisitorImplForData<T>(context);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1003,10 +1064,13 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
if (auto res = PreCheckOverflow<T>(input)) {
|
||||
return res;
|
||||
}
|
||||
@ -1022,15 +1086,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
arg_inited_ = true;
|
||||
}
|
||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
auto expr_type = expr_->op_type_;
|
||||
|
||||
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
||||
FilterType::sequential>(
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ expr_type, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -1041,43 +1107,91 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
switch (expr_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GreaterEqual: {
|
||||
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
|
||||
func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessThan: {
|
||||
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessEqual: {
|
||||
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::Equal: {
|
||||
UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::NotEqual: {
|
||||
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::PrefixMatch: {
|
||||
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::Match: {
|
||||
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
|
||||
func(data, size, val, res, offsets);
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -1090,7 +1204,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||
// but to mask res with valid_data after the batch operation.
|
||||
if (valid_data != nullptr) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
|
||||
continue;
|
||||
}
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
offset = (offsets) ? offsets[i] : i;
|
||||
@ -1100,6 +1218,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
}
|
||||
}
|
||||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
||||
|
||||
@ -41,15 +41,20 @@ struct UnaryElementFuncForMatch {
|
||||
|
||||
void
|
||||
operator()(const T* src,
|
||||
|
||||
size_t size,
|
||||
IndexInnerType val,
|
||||
TargetBitmapView res,
|
||||
int64_t* offsets = nullptr) {
|
||||
const TargetBitmap& bitmap_input,
|
||||
int start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(val);
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
res[i] = matcher(src[offsets ? offsets[i] : i]);
|
||||
} else {
|
||||
@ -69,17 +74,25 @@ struct UnaryElementFunc {
|
||||
size_t size,
|
||||
IndexInnerType val,
|
||||
TargetBitmapView res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
if constexpr (op == proto::plan::OpType::Match) {
|
||||
UnaryElementFuncForMatch<T, filter_type> func;
|
||||
func(src, size, val, res);
|
||||
func(src, size, val, res, bitmap_input, start_cursor, offsets);
|
||||
return;
|
||||
}
|
||||
|
||||
// This is the original code, which is kept for the documentation purposes
|
||||
// also, for iterative filter
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
if constexpr (filter_type == FilterType::random ||
|
||||
std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, std::string>) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||
continue;
|
||||
}
|
||||
auto offset = (offsets != nullptr) ? offsets[i] : i;
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
res[i] = src[offset] == val;
|
||||
@ -164,7 +177,10 @@ struct UnaryElementFuncForArray {
|
||||
int index,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const TargetBitmap& bitmap_input,
|
||||
size_t start_cursor,
|
||||
const int32_t* offsets = nullptr) {
|
||||
bool has_bitmap_input = !bitmap_input.empty();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -174,6 +190,9 @@ struct UnaryElementFuncForArray {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = src[offset].is_same_array(val);
|
||||
@ -340,10 +359,30 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return expr_->column_;
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr>
|
||||
GetLogicalExpr() {
|
||||
return expr_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImpl(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
@ -351,23 +390,23 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplForData(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplJson(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplJson(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplArray(OffsetVector* input = nullptr);
|
||||
ExecRangeVisitorImplArray(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplArrayForIndex();
|
||||
ExecRangeVisitorImplArrayForIndex(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecArrayEqualForIndex(bool reverse);
|
||||
ExecArrayEqualForIndex(EvalCtx& context, bool reverse);
|
||||
|
||||
// Check overflow and cache result for performace
|
||||
template <typename T>
|
||||
|
||||
@ -59,6 +59,21 @@ class PhyValueExpr : public Expr {
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
return fmt::format("{}", expr_->ToString());
|
||||
}
|
||||
|
||||
bool
|
||||
IsSource() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<milvus::expr::ColumnInfo>
|
||||
GetColumnInfo() const override {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::ValueExpr> expr_;
|
||||
const int64_t active_count_;
|
||||
|
||||
@ -210,6 +210,8 @@ std::map<std::string, std::string> searchGetTargetEntryLatencyLabels{
|
||||
{"type", "search_get_target_entry_latency"}};
|
||||
std::map<std::string, std::string> randomSampleLatencyLabels{
|
||||
{"type", "random_sample_latency"}};
|
||||
std::map<std::string, std::string> optimizeExprLatencyLabels{
|
||||
{"type", "optimize_expr_latency"}};
|
||||
|
||||
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
|
||||
"[cpp]latency(us) of search on segment")
|
||||
@ -242,7 +244,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency,
|
||||
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample,
|
||||
internal_core_search_latency,
|
||||
randomSampleLatencyLabels)
|
||||
|
||||
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency,
|
||||
internal_core_search_latency,
|
||||
optimizeExprLatencyLabels)
|
||||
// mmap metrics
|
||||
std::map<std::string, std::string> mmapAllocatedSpaceAnonLabel = {
|
||||
{"type", "anon"}};
|
||||
|
||||
@ -142,6 +142,7 @@ DECLARE_PROMETHEUS_HISTOGRAM(internal_core_get_vector_latency);
|
||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_retrieve_get_target_entry_latency);
|
||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency);
|
||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample);
|
||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency);
|
||||
|
||||
// async cgo metrics
|
||||
DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cgo_queue_duration_seconds);
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "exec/QueryContext.h"
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "exec/expression/ConjunctExpr.h"
|
||||
#include "exec/expression/function/FunctionFactory.h"
|
||||
|
||||
using namespace milvus;
|
||||
@ -82,10 +83,12 @@ class TaskTest : public testing::TestWithParam<DataType> {
|
||||
field_map_.insert({"string2", str2_fid});
|
||||
auto str3_fid = schema->AddDebugField("string3", DataType::VARCHAR);
|
||||
field_map_.insert({"string3", str3_fid});
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
field_map_.insert({"json", json_fid});
|
||||
schema->set_primary_field_id(str1_fid);
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
size_t N = 1000000;
|
||||
size_t N = 100000;
|
||||
num_rows_ = N;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
auto fields = schema->get_fields();
|
||||
@ -152,7 +155,7 @@ TEST_P(TaskTest, CallExprEmpty) {
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
"test1",
|
||||
segment_.get(),
|
||||
1000000,
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
@ -189,7 +192,7 @@ TEST_P(TaskTest, UnaryExpr) {
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
"test1",
|
||||
segment_.get(),
|
||||
1000000,
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
@ -235,7 +238,7 @@ TEST_P(TaskTest, LogicalExpr) {
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
"test1",
|
||||
segment_.get(),
|
||||
1000000,
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
@ -296,12 +299,12 @@ TEST_P(TaskTest, CompileInputs_and) {
|
||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
auto exprs = milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||
EXPECT_EQ(exprs.size(), 4);
|
||||
for (int i = 0; i < exprs.size(); ++i) {
|
||||
std::cout << exprs[i]->get_name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->get_name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||
std::cout << exprs[i]->name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,7 +319,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
||||
proto::plan::GenericValue val;
|
||||
val.set_int64_val(10);
|
||||
{
|
||||
// expr: (int64_fid < 10 and int64_fid < 10) or (int64_fid < 10 and int64_fid < 10)
|
||||
// expr: (int64_fid > 10 and int64_fid > 10) or (int64_fid > 10 and int64_fid > 10)
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::GreaterThan,
|
||||
@ -342,19 +345,19 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
||||
auto exprs =
|
||||
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||
EXPECT_EQ(exprs.size(), 2);
|
||||
for (int i = 0; i < exprs.size(); ++i) {
|
||||
std::cout << exprs[i]->get_name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->get_name().c_str(), "and");
|
||||
std::cout << exprs[i]->name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
}
|
||||
}
|
||||
{
|
||||
// expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid < 10 and int64_fid < 10)
|
||||
// expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid > 10 and int64_fid > 10)
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::GreaterThan,
|
||||
@ -380,7 +383,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
||||
auto exprs =
|
||||
@ -388,14 +391,13 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
||||
std::cout << exprs.size() << std::endl;
|
||||
EXPECT_EQ(exprs.size(), 3);
|
||||
for (int i = 0; i < exprs.size() - 1; ++i) {
|
||||
std::cout << exprs[i]->get_name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->get_name().c_str(),
|
||||
"PhyUnaryRangeFilterExpr");
|
||||
std::cout << exprs[i]->name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||
}
|
||||
EXPECT_STREQ(exprs[2]->get_name().c_str(), "and");
|
||||
EXPECT_STREQ(exprs[2]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
}
|
||||
{
|
||||
// expr: (int64_fid < 10 or int64_fid < 10) and (int64_fid < 10 and int64_fid < 10)
|
||||
// expr: (int64_fid > 10 or int64_fid > 10) and (int64_fid > 10 and int64_fid > 10)
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::GreaterThan,
|
||||
@ -421,18 +423,282 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
||||
auto exprs =
|
||||
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||
std::cout << exprs.size() << std::endl;
|
||||
EXPECT_EQ(exprs.size(), 3);
|
||||
EXPECT_STREQ(exprs[0]->get_name().c_str(), "or");
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
for (int i = 1; i < exprs.size(); ++i) {
|
||||
std::cout << exprs[i]->get_name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->get_name().c_str(),
|
||||
"PhyUnaryRangeFilterExpr");
|
||||
std::cout << exprs[i]->name() << std::endl;
|
||||
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(TaskTest, Test_reorder) {
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::exec;
|
||||
|
||||
{
|
||||
// expr: string2 like '%xx' and string2 == 'xxx'
|
||||
// reorder: string2 == "xxx" and string2 like '%xxx'
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("%xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||
proto::plan::OpType::Match,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_string_val("xxx");
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||
proto::plan::OpType::Equal,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: string2 == 'xxx' and int1 < 100
|
||||
// reorder: int1 < 100 and string2 == 'xxx'
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||
proto::plan::OpType::Equal,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: json['b'] like '%xx' and json['a'] == 'xxx'
|
||||
// reorder: json['a'] == 'xxx' and json['b'] like '%xx'
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("%xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||
proto::plan::OpType::Match,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_string_val("xxx");
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||
proto::plan::OpType::Equal,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: json['a'] == 'xxx' and int1 == 100
|
||||
// reorder: int1 == 100 and json['a'] == 'xxx'
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||
proto::plan::OpType::Equal,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||
proto::plan::OpType::Equal,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: json['a'] == 'xxx' and 0 < int1 < 100
|
||||
// reorder: 0 < int1 < 100 and json['a'] == 'xxx'
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||
proto::plan::OpType::Equal,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue low;
|
||||
low.set_int64_val(0);
|
||||
proto::plan::GenericValue upper;
|
||||
upper.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||
low,
|
||||
upper,
|
||||
false,
|
||||
false);
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: string1 != string2 and 0 < int1 < 100
|
||||
// reorder: 0 < int1 < 100 and string1 != string2
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::CompareExpr>(field_map_["string1"],
|
||||
field_map_["string2"],
|
||||
DataType::VARCHAR,
|
||||
DataType::VARCHAR,
|
||||
OpType::LessThan);
|
||||
proto::plan::GenericValue low;
|
||||
low.set_int64_val(0);
|
||||
proto::plan::GenericValue upper;
|
||||
upper.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||
low,
|
||||
upper,
|
||||
false,
|
||||
false);
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 2);
|
||||
EXPECT_EQ(reorder[0], 1);
|
||||
EXPECT_EQ(reorder[1], 0);
|
||||
}
|
||||
|
||||
{
|
||||
// expr: string2 like '%xx' and string2 == 'xxx'
|
||||
// disable optimize expr, still remain sequence
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("%xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||
proto::plan::OpType::Match,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_string_val("xxx");
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||
proto::plan::OpType::Equal,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||
ExecContext context(query_context.get());
|
||||
OPTIMIZE_EXPR_ENABLED = false;
|
||||
auto exprs =
|
||||
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||
EXPECT_EQ(exprs.size(), 1);
|
||||
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||
auto phy_expr =
|
||||
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||
exprs[0]);
|
||||
std::cout << phy_expr->ToString() << std::endl;
|
||||
auto reorder = phy_expr->GetReorder();
|
||||
EXPECT_EQ(reorder.size(), 0);
|
||||
OPTIMIZE_EXPR_ENABLED = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3401,6 +3401,145 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
|
||||
std::cout << "end compare test" << std::endl;
|
||||
}
|
||||
|
||||
TEST_P(ExprTest, TestReorder) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("id", DataType::INT64);
|
||||
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
|
||||
auto bool_1_fid = schema->AddDebugField("bool1", DataType::BOOL);
|
||||
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
|
||||
auto int8_1_fid = schema->AddDebugField("int81", DataType::INT8);
|
||||
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
|
||||
auto int16_1_fid = schema->AddDebugField("int161", DataType::INT16);
|
||||
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
|
||||
auto int32_1_fid = schema->AddDebugField("int321", DataType::INT32);
|
||||
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto int64_1_fid = schema->AddDebugField("int641", DataType::INT64);
|
||||
auto float_fid = schema->AddDebugField("float", DataType::FLOAT);
|
||||
auto float_1_fid = schema->AddDebugField("float1", DataType::FLOAT);
|
||||
auto double_fid = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto double_1_fid = schema->AddDebugField("double1", DataType::DOUBLE);
|
||||
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON, false);
|
||||
auto str_array_fid =
|
||||
schema->AddDebugField("str_array", DataType::ARRAY, DataType::VARCHAR);
|
||||
schema->set_primary_field_id(pk);
|
||||
|
||||
auto seg = CreateSealedSegment(schema);
|
||||
size_t N = 1000;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
auto fields = schema->get_fields();
|
||||
for (auto field_data : raw_data.raw_->fields_data()) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
|
||||
query::ExecPlanNodeVisitor visitor(*seg, MAX_TIMESTAMP);
|
||||
|
||||
auto build_expr = [&](int index) -> expr::TypedExprPtr {
|
||||
switch (index) {
|
||||
case 0: {
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(str1_fid, DataType::VARCHAR),
|
||||
proto::plan::OpType::Equal,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
return expr3;
|
||||
};
|
||||
case 1: {
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("xxx");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(json_fid, DataType::JSON, {"int"}),
|
||||
proto::plan::OpType::Equal,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
return expr3;
|
||||
};
|
||||
case 2: {
|
||||
proto::plan::GenericValue val1;
|
||||
val1.set_string_val("12");
|
||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(str_array_fid, DataType::ARRAY, {"0"}),
|
||||
proto::plan::OpType::Match,
|
||||
val1,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
return expr3;
|
||||
};
|
||||
case 3: {
|
||||
auto expr1 =
|
||||
std::make_shared<expr::CompareExpr>(int64_fid,
|
||||
int64_1_fid,
|
||||
DataType::INT64,
|
||||
DataType::INT64,
|
||||
OpType::LessThan);
|
||||
proto::plan::GenericValue val2;
|
||||
val2.set_int64_val(100);
|
||||
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
val2,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||
return expr3;
|
||||
};
|
||||
default:
|
||||
PanicInfo(ErrorCode::UnexpectedError, "not implement");
|
||||
}
|
||||
};
|
||||
BitsetType final;
|
||||
auto expr = build_expr(0);
|
||||
auto plan =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||
expr = build_expr(1);
|
||||
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||
expr = build_expr(2);
|
||||
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||
expr = build_expr(3);
|
||||
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||
}
|
||||
|
||||
TEST_P(ExprTest, TestCompareExprNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);
|
||||
|
||||
@ -242,6 +242,9 @@ func (node *QueryNode) InitSegcore() error {
|
||||
cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64())
|
||||
C.InitDefaultExprEvalBatchSize(cExprBatchSize)
|
||||
|
||||
cOptimizeExprEnabled := C.bool(paramtable.Get().CommonCfg.EnabledOptimizeExpr.GetAsBool())
|
||||
C.InitDefaultOptimizeExprEnable(cOptimizeExprEnabled)
|
||||
|
||||
cGpuMemoryPoolInitSize := C.uint32_t(paramtable.Get().GpuConfig.InitSize.GetAsUint32())
|
||||
cGpuMemoryPoolMaxSize := C.uint32_t(paramtable.Get().GpuConfig.MaxSize.GetAsUint32())
|
||||
C.SegcoreSetKnowhereGpuMemoryPoolSize(cGpuMemoryPoolInitSize, cGpuMemoryPoolMaxSize)
|
||||
|
||||
@ -292,6 +292,8 @@ type commonConfig struct {
|
||||
LocalRPCEnabled ParamItem `refreshable:"false"`
|
||||
|
||||
SyncTaskPoolReleaseTimeoutSeconds ParamItem `refreshable:"true"`
|
||||
|
||||
EnabledOptimizeExpr ParamItem `refreshable:"true"`
|
||||
}
|
||||
|
||||
func (p *commonConfig) init(base *BaseTable) {
|
||||
@ -994,6 +996,15 @@ This helps Milvus-CDC synchronize incremental data`,
|
||||
Export: true,
|
||||
}
|
||||
p.SyncTaskPoolReleaseTimeoutSeconds.Init(base.mgr)
|
||||
|
||||
p.EnabledOptimizeExpr = ParamItem{
|
||||
Key: "common.enabledOptimizeExpr",
|
||||
Version: "2.5.6",
|
||||
DefaultValue: "true",
|
||||
Doc: "Indicates whether to enable optimize expr",
|
||||
Export: true,
|
||||
}
|
||||
p.EnabledOptimizeExpr.Init(base.mgr)
|
||||
}
|
||||
|
||||
type gpuConfig struct {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user