mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: reorder sub expr for conjunct expr (#39872)
two point:
(1) reoder conjucts expr's subexpr, postpone heavy operations
sequence: int(column) -> index(column) -> string(column) -> light
conjuct
...... -> json(column) -> heavy conjuct -> two_column_compare
(2) support pre filter for expr execute, skip scan raw data that had
been skipped
because of preceding expr result.
#39869
Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
8db708f67d
commit
6c55db44f1
@ -889,6 +889,7 @@ common:
|
|||||||
localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode.
|
localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode.
|
||||||
sync:
|
sync:
|
||||||
taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
|
taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
|
||||||
|
enabledOptimizeExpr: true # Indicates whether to enable optimize expr
|
||||||
|
|
||||||
# QuotaConfig, configurations of Milvus quota and limits.
|
# QuotaConfig, configurations of Milvus quota and limits.
|
||||||
# By default, we enable:
|
# By default, we enable:
|
||||||
|
|||||||
@ -28,6 +28,7 @@ int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT =
|
|||||||
DEFAULT_LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
DEFAULT_LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||||
int CPU_NUM = DEFAULT_CPU_NUM;
|
int CPU_NUM = DEFAULT_CPU_NUM;
|
||||||
int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE;
|
int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||||
|
bool OPTIMIZE_EXPR_ENABLED = DEFAULT_OPTIMIZE_EXPR_ENABLED;
|
||||||
|
|
||||||
void
|
void
|
||||||
SetIndexSliceSize(const int64_t size) {
|
SetIndexSliceSize(const int64_t size) {
|
||||||
@ -67,4 +68,10 @@ SetCpuNum(const int num) {
|
|||||||
CPU_NUM = num;
|
CPU_NUM = num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SetDefaultOptimizeExprEnable(bool val) {
|
||||||
|
OPTIMIZE_EXPR_ENABLED = val;
|
||||||
|
LOG_INFO("set default optimize expr enabled: {}", OPTIMIZE_EXPR_ENABLED);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|||||||
@ -29,6 +29,7 @@ extern int64_t MIDDLE_PRIORITY_THREAD_CORE_COEFFICIENT;
|
|||||||
extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||||
extern int CPU_NUM;
|
extern int CPU_NUM;
|
||||||
extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE;
|
extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||||
|
extern bool OPTIMIZE_EXPR_ENABLED;
|
||||||
|
|
||||||
void
|
void
|
||||||
SetIndexSliceSize(const int64_t size);
|
SetIndexSliceSize(const int64_t size);
|
||||||
@ -48,6 +49,9 @@ SetCpuNum(const int core);
|
|||||||
void
|
void
|
||||||
SetDefaultExecEvalExprBatchSize(int64_t val);
|
SetDefaultExecEvalExprBatchSize(int64_t val);
|
||||||
|
|
||||||
|
void
|
||||||
|
SetDefaultOptimizeExprEnable(bool val);
|
||||||
|
|
||||||
struct BufferView {
|
struct BufferView {
|
||||||
struct Element {
|
struct Element {
|
||||||
const char* data_;
|
const char* data_;
|
||||||
|
|||||||
@ -80,3 +80,4 @@ const size_t MARISA_NULL_KEY_ID = -1;
|
|||||||
|
|
||||||
const std::string JSON_CAST_TYPE = "json_cast_type";
|
const std::string JSON_CAST_TYPE = "json_cast_type";
|
||||||
const std::string JSON_PATH = "json_path";
|
const std::string JSON_PATH = "json_path";
|
||||||
|
const bool DEFAULT_OPTIMIZE_EXPR_ENABLED = true;
|
||||||
|
|||||||
@ -254,6 +254,11 @@ IsFloatDataType(DataType data_type) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool
|
||||||
|
IsNumericDataType(DataType data_type) {
|
||||||
|
return IsIntegerDataType(data_type) || IsFloatDataType(data_type);
|
||||||
|
}
|
||||||
|
|
||||||
inline bool
|
inline bool
|
||||||
IsStringDataType(DataType data_type) {
|
IsStringDataType(DataType data_type) {
|
||||||
switch (data_type) {
|
switch (data_type) {
|
||||||
|
|||||||
@ -25,7 +25,7 @@
|
|||||||
#include "common/Tracer.h"
|
#include "common/Tracer.h"
|
||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
|
|
||||||
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6;
|
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6, flag7;
|
||||||
std::once_flag traceFlag;
|
std::once_flag traceFlag;
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -78,6 +78,14 @@ InitDefaultExprEvalBatchSize(int64_t val) {
|
|||||||
val);
|
val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
InitDefaultOptimizeExprEnable(bool val) {
|
||||||
|
std::call_once(
|
||||||
|
flag7,
|
||||||
|
[](bool val) { milvus::SetDefaultOptimizeExprEnable(val); },
|
||||||
|
val);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
InitTrace(CTraceConfig* config) {
|
InitTrace(CTraceConfig* config) {
|
||||||
auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
|
auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
|
||||||
|
|||||||
@ -48,6 +48,9 @@ InitTrace(CTraceConfig* config);
|
|||||||
void
|
void
|
||||||
SetTrace(CTraceConfig* config);
|
SetTrace(CTraceConfig* config);
|
||||||
|
|
||||||
|
void
|
||||||
|
InitDefaultOptimizeExprEnable(bool val);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -35,8 +35,9 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
|
||||||
|
|||||||
@ -57,6 +57,21 @@ class PhyAlwaysTrueExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const override {
|
||||||
|
return "[AlwaysTrue]";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::AlwaysTrueExpr> expr_;
|
std::shared_ptr<const milvus::expr::AlwaysTrueExpr> expr_;
|
||||||
int64_t active_count_;
|
int64_t active_count_;
|
||||||
|
|||||||
@ -117,11 +117,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
value_arg_.SetValue<ValueType>(expr_->value_);
|
value_arg_.SetValue<ValueType>(expr_->value_);
|
||||||
@ -535,11 +535,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
int index = -1;
|
int index = -1;
|
||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
@ -1435,11 +1435,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
|
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
|
||||||
|
|||||||
@ -464,6 +464,21 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const override {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
|
|||||||
@ -28,31 +28,31 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
SetHasOffsetInput((input != nullptr));
|
SetHasOffsetInput((input != nullptr));
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecRangeVisitorImpl<bool>(input);
|
result = ExecRangeVisitorImpl<bool>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecRangeVisitorImpl<int8_t>(input);
|
result = ExecRangeVisitorImpl<int8_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecRangeVisitorImpl<int16_t>(input);
|
result = ExecRangeVisitorImpl<int16_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecRangeVisitorImpl<int32_t>(input);
|
result = ExecRangeVisitorImpl<int32_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecRangeVisitorImpl<int64_t>(input);
|
result = ExecRangeVisitorImpl<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecRangeVisitorImpl<float>(input);
|
result = ExecRangeVisitorImpl<float>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecRangeVisitorImpl<double>(input);
|
result = ExecRangeVisitorImpl<double>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -60,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecRangeVisitorImpl<std::string>(input);
|
result = ExecRangeVisitorImpl<std::string>(context);
|
||||||
} else {
|
} else {
|
||||||
result = ExecRangeVisitorImpl<std::string_view>(input);
|
result = ExecRangeVisitorImpl<std::string_view>(context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -70,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
auto value_type = expr_->lower_val_.val_case();
|
auto value_type = expr_->lower_val_.val_case();
|
||||||
switch (value_type) {
|
switch (value_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||||
result = ExecRangeVisitorImplForJson<int64_t>(input);
|
result = ExecRangeVisitorImplForJson<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||||
result = ExecRangeVisitorImplForJson<double>(input);
|
result = ExecRangeVisitorImplForJson<double>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||||
result = ExecRangeVisitorImplForJson<std::string>(input);
|
result = ExecRangeVisitorImplForJson<std::string>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -95,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
switch (value_type) {
|
switch (value_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<int64_t>(input);
|
result = ExecRangeVisitorImplForArray<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<double>(input);
|
result = ExecRangeVisitorImplForArray<double>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<std::string>(input);
|
result = ExecRangeVisitorImplForArray<std::string>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -126,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||||
if (is_index_mode_ && !has_offset_input_) {
|
if (is_index_mode_ && !has_offset_input_) {
|
||||||
return ExecRangeVisitorImplForIndex<T>();
|
return ExecRangeVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
return ExecRangeVisitorImplForData<T>(input);
|
return ExecRangeVisitorImplForData<T>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,7 +235,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
@ -246,6 +246,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
IndexInnerType>
|
IndexInnerType>
|
||||||
HighPrecisionType;
|
HighPrecisionType;
|
||||||
|
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
HighPrecisionType val1;
|
HighPrecisionType val1;
|
||||||
HighPrecisionType val2;
|
HighPrecisionType val2;
|
||||||
bool lower_inclusive = false;
|
bool lower_inclusive = false;
|
||||||
@ -260,15 +262,16 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[ lower_inclusive,
|
[ lower_inclusive, upper_inclusive, &processed_cursor, &
|
||||||
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const T* data,
|
const T* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -279,16 +282,44 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
HighPrecisionType val2) {
|
HighPrecisionType val2) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, true, true, filter_type> func;
|
BinaryRangeElementFunc<T, true, true, filter_type> func;
|
||||||
func(val1, val2, data, size, res, offsets);
|
func(val1,
|
||||||
|
val2,
|
||||||
|
data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, true, false, filter_type> func;
|
BinaryRangeElementFunc<T, true, false, filter_type> func;
|
||||||
func(val1, val2, data, size, res, offsets);
|
func(val1,
|
||||||
|
val2,
|
||||||
|
data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, false, true, filter_type> func;
|
BinaryRangeElementFunc<T, false, true, filter_type> func;
|
||||||
func(val1, val2, data, size, res, offsets);
|
func(val1,
|
||||||
|
val2,
|
||||||
|
data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFunc<T, false, false, filter_type> func;
|
BinaryRangeElementFunc<T, false, false, filter_type> func;
|
||||||
func(val1, val2, data, size, res, offsets);
|
func(val1,
|
||||||
|
val2,
|
||||||
|
data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
}
|
}
|
||||||
// there is a batch operation in BinaryRangeElementFunc,
|
// there is a batch operation in BinaryRangeElementFunc,
|
||||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
@ -304,6 +335,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto skip_index_func =
|
auto skip_index_func =
|
||||||
@ -346,20 +378,23 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
FieldId field_id = expr_->column_.field_id_;
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
bool lower_inclusive = expr_->lower_inclusive_;
|
bool lower_inclusive = expr_->lower_inclusive_;
|
||||||
bool upper_inclusive = expr_->upper_inclusive_;
|
bool upper_inclusive = expr_->upper_inclusive_;
|
||||||
@ -372,9 +407,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
ValueType val2 = upper_arg_.GetValue<ValueType>();
|
ValueType val2 = upper_arg_.GetValue<ValueType>();
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[ lower_inclusive, upper_inclusive,
|
[
|
||||||
pointer ]<FilterType filter_type = FilterType::sequential>(
|
lower_inclusive,
|
||||||
|
upper_inclusive,
|
||||||
|
pointer,
|
||||||
|
&bitmap_input,
|
||||||
|
&processed_cursor
|
||||||
|
]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -394,6 +435,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
|
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
|
||||||
@ -406,6 +449,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
|
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
@ -419,6 +464,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
|
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
|
||||||
@ -431,8 +478,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -457,20 +507,22 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
bool lower_inclusive = expr_->lower_inclusive_;
|
bool lower_inclusive = expr_->lower_inclusive_;
|
||||||
bool upper_inclusive = expr_->upper_inclusive_;
|
bool upper_inclusive = expr_->upper_inclusive_;
|
||||||
@ -488,9 +540,10 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
|||||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[ lower_inclusive,
|
[ lower_inclusive, upper_inclusive, &processed_cursor, &
|
||||||
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::ArrayView* data,
|
const milvus::ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -511,6 +564,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
|
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
|
||||||
@ -523,6 +578,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
|
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
@ -536,6 +593,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -549,9 +608,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
|||||||
size,
|
size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
processed_size =
|
processed_size =
|
||||||
|
|||||||
@ -44,9 +44,17 @@ struct BinaryRangeElementFunc {
|
|||||||
const T* src,
|
const T* src,
|
||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random ||
|
||||||
|
std::is_same_v<T, std::string> ||
|
||||||
|
std::is_same_v<T, std::string_view>) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto offset = (offsets) ? offsets[i] : i;
|
auto offset = (offsets) ? offsets[i] : i;
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
res[i] = val1 <= src[offset] && src[offset] <= val2;
|
res[i] = val1 <= src[offset] && src[offset] <= val2;
|
||||||
@ -83,6 +91,9 @@ struct BinaryRangeElementFunc {
|
|||||||
res[i] = valid_res[i] = false; \
|
res[i] = valid_res[i] = false; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) { \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
auto x = src[offset].template at<GetType>(pointer); \
|
auto x = src[offset].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
@ -117,7 +128,10 @@ struct BinaryRangeElementFuncForJson {
|
|||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -153,8 +167,14 @@ struct BinaryRangeElementFuncForArray {
|
|||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
size_t offset = i;
|
size_t offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
offset = (offsets) ? offsets[i] : i;
|
offset = (offsets) ? offsets[i] : i;
|
||||||
@ -240,6 +260,21 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Check overflow and cache result for performace
|
// Check overflow and cache result for performace
|
||||||
template <
|
template <
|
||||||
@ -259,7 +294,7 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
ExecRangeVisitorImpl(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
@ -267,15 +302,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplForData(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplForJson(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplForArray(EvalCtx& context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||||
|
|||||||
@ -68,6 +68,21 @@ class PhyCallExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::CallExpr> expr_;
|
std::shared_ptr<const milvus::expr::CallExpr> expr_;
|
||||||
|
|
||||||
|
|||||||
@ -114,6 +114,21 @@ class PhyColumnExpr : public Expr {
|
|||||||
VectorPtr
|
VectorPtr
|
||||||
DoEval(OffsetVector* input = nullptr);
|
DoEval(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->GetColumn();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool is_indexed_;
|
bool is_indexed_;
|
||||||
|
|
||||||
|
|||||||
@ -38,20 +38,20 @@ PhyCompareFilterExpr::GetNextBatchSize() {
|
|||||||
|
|
||||||
template <typename OpType>
|
template <typename OpType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op,
|
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, EvalCtx& context) {
|
||||||
OffsetVector* input) {
|
|
||||||
// take offsets as input
|
// take offsets as input
|
||||||
|
auto input = context.get_offset_input();
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
auto real_batch_size = input->size();
|
auto real_batch_size = input->size();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
|
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
|
||||||
expr_->left_field_id_);
|
expr_->left_field_id_);
|
||||||
@ -215,37 +215,37 @@ PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
// For segment both fields has no index, can use SIMD to speed up.
|
// For segment both fields has no index, can use SIMD to speed up.
|
||||||
// Avoiding too much call stack that blocks SIMD.
|
// Avoiding too much call stack that blocks SIMD.
|
||||||
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
|
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
|
||||||
result = ExecCompareExprDispatcherForBothDataSegment(input);
|
result = ExecCompareExprDispatcherForBothDataSegment(context);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
result = ExecCompareExprDispatcherForHybridSegment(input);
|
result = ExecCompareExprDispatcherForHybridSegment(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
||||||
OffsetVector* input) {
|
EvalCtx& context) {
|
||||||
switch (expr_->op_type_) {
|
switch (expr_->op_type_) {
|
||||||
case OpType::Equal: {
|
case OpType::Equal: {
|
||||||
return ExecCompareExprDispatcher(std::equal_to<>{}, input);
|
return ExecCompareExprDispatcher(std::equal_to<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::NotEqual: {
|
case OpType::NotEqual: {
|
||||||
return ExecCompareExprDispatcher(std::not_equal_to<>{}, input);
|
return ExecCompareExprDispatcher(std::not_equal_to<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::GreaterEqual: {
|
case OpType::GreaterEqual: {
|
||||||
return ExecCompareExprDispatcher(std::greater_equal<>{}, input);
|
return ExecCompareExprDispatcher(std::greater_equal<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::GreaterThan: {
|
case OpType::GreaterThan: {
|
||||||
return ExecCompareExprDispatcher(std::greater<>{}, input);
|
return ExecCompareExprDispatcher(std::greater<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::LessEqual: {
|
case OpType::LessEqual: {
|
||||||
return ExecCompareExprDispatcher(std::less_equal<>{}, input);
|
return ExecCompareExprDispatcher(std::less_equal<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::LessThan: {
|
case OpType::LessThan: {
|
||||||
return ExecCompareExprDispatcher(std::less<>{}, input);
|
return ExecCompareExprDispatcher(std::less<>{}, context);
|
||||||
}
|
}
|
||||||
case OpType::PrefixMatch: {
|
case OpType::PrefixMatch: {
|
||||||
return ExecCompareExprDispatcher(
|
return ExecCompareExprDispatcher(
|
||||||
milvus::query::MatchOp<OpType::PrefixMatch>{}, input);
|
milvus::query::MatchOp<OpType::PrefixMatch>{}, context);
|
||||||
}
|
}
|
||||||
// case OpType::PostfixMatch: {
|
// case OpType::PostfixMatch: {
|
||||||
// }
|
// }
|
||||||
@ -257,22 +257,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
|||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
||||||
OffsetVector* input) {
|
EvalCtx& context) {
|
||||||
switch (expr_->left_data_type_) {
|
switch (expr_->left_data_type_) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
return ExecCompareLeftType<bool>(input);
|
return ExecCompareLeftType<bool>(context);
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
return ExecCompareLeftType<int8_t>(input);
|
return ExecCompareLeftType<int8_t>(context);
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
return ExecCompareLeftType<int16_t>(input);
|
return ExecCompareLeftType<int16_t>(context);
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
return ExecCompareLeftType<int32_t>(input);
|
return ExecCompareLeftType<int32_t>(context);
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
return ExecCompareLeftType<int64_t>(input);
|
return ExecCompareLeftType<int64_t>(context);
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
return ExecCompareLeftType<float>(input);
|
return ExecCompareLeftType<float>(context);
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
return ExecCompareLeftType<double>(input);
|
return ExecCompareLeftType<double>(context);
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
@ -283,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
|
PhyCompareFilterExpr::ExecCompareLeftType(EvalCtx& context) {
|
||||||
switch (expr_->right_data_type_) {
|
switch (expr_->right_data_type_) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
return ExecCompareRightType<T, bool>(input);
|
return ExecCompareRightType<T, bool>(context);
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
return ExecCompareRightType<T, int8_t>(input);
|
return ExecCompareRightType<T, int8_t>(context);
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
return ExecCompareRightType<T, int16_t>(input);
|
return ExecCompareRightType<T, int16_t>(context);
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
return ExecCompareRightType<T, int32_t>(input);
|
return ExecCompareRightType<T, int32_t>(context);
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
return ExecCompareRightType<T, int64_t>(input);
|
return ExecCompareRightType<T, int64_t>(context);
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
return ExecCompareRightType<T, float>(input);
|
return ExecCompareRightType<T, float>(context);
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
return ExecCompareRightType<T, double>(input);
|
return ExecCompareRightType<T, double>(context);
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
@ -309,22 +309,26 @@ PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
PhyCompareFilterExpr::ExecCompareRightType(EvalCtx& context) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
auto res_vec =
|
||||||
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
size_t processed_cursor = 0;
|
||||||
FilterType::sequential>(
|
auto execute_sub_batch =
|
||||||
|
[ expr_type, &bitmap_input, &
|
||||||
|
processed_cursor ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const T* left,
|
const T* left,
|
||||||
const U* right,
|
const U* right,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -334,36 +338,72 @@ PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
|||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
|
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
|
||||||
func;
|
func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
|
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
|
||||||
func;
|
func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
|
CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
|
||||||
func;
|
func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
|
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
|
||||||
func;
|
func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
|
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
|
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
|
||||||
func;
|
func;
|
||||||
func(left, right, size, res, offsets);
|
func(left,
|
||||||
|
right,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -372,6 +412,7 @@ PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
|||||||
"compare column expr: {}",
|
"compare column expr: {}",
|
||||||
expr_type));
|
expr_type));
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
|
|||||||
@ -40,6 +40,8 @@ struct CompareElementFunc {
|
|||||||
const U* right,
|
const U* right,
|
||||||
size_t size,
|
size_t size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
// This is the original code, kept here for the documentation purposes
|
// This is the original code, kept here for the documentation purposes
|
||||||
// also, used for iterative filter
|
// also, used for iterative filter
|
||||||
@ -69,6 +71,34 @@ struct CompareElementFunc {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bitmap_input.empty()) {
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (!bitmap_input[start_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
|
res[i] = left[i] == right[i];
|
||||||
|
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||||
|
res[i] = left[i] != right[i];
|
||||||
|
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
||||||
|
res[i] = left[i] > right[i];
|
||||||
|
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
||||||
|
res[i] = left[i] < right[i];
|
||||||
|
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
||||||
|
res[i] = left[i] >= right[i];
|
||||||
|
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||||
|
res[i] = left[i] <= right[i];
|
||||||
|
} else {
|
||||||
|
PanicInfo(
|
||||||
|
OpTypeInvalid,
|
||||||
|
fmt::format(
|
||||||
|
"unsupported op_type:{} for CompareElementFunc",
|
||||||
|
op));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
|
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
|
||||||
left, right, size);
|
left, right, size);
|
||||||
@ -170,6 +200,21 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int64_t
|
int64_t
|
||||||
GetCurrentRows() {
|
GetCurrentRows() {
|
||||||
@ -451,21 +496,21 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
template <typename OpType>
|
template <typename OpType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr);
|
ExecCompareExprDispatcher(OpType op, EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr);
|
ExecCompareExprDispatcherForHybridSegment(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr);
|
ExecCompareExprDispatcherForBothDataSegment(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareLeftType(OffsetVector* input = nullptr);
|
ExecCompareLeftType(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareRightType(OffsetVector* input = nullptr);
|
ExecCompareRightType(EvalCtx& context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const FieldId left_field_;
|
const FieldId left_field_;
|
||||||
|
|||||||
@ -83,16 +83,22 @@ PhyConjunctFilterExpr::CanSkipFollowingExprs(ColumnVectorPtr& vec) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyConjunctFilterExpr::SkipFollowingExprs(int start) {
|
PhyConjunctFilterExpr::SkipFollowingExprs(int start) {
|
||||||
for (int i = start; i < inputs_.size(); ++i) {
|
for (int i = start; i < input_order_.size(); ++i) {
|
||||||
inputs_[i]->MoveCursor();
|
inputs_[input_order_[i]]->MoveCursor();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
for (int i = 0; i < inputs_.size(); ++i) {
|
if (input_order_.empty()) {
|
||||||
|
input_order_.resize(inputs_.size());
|
||||||
|
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||||
|
input_order_[i] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < input_order_.size(); ++i) {
|
||||||
VectorPtr input_result;
|
VectorPtr input_result;
|
||||||
inputs_[i]->Eval(context, input_result);
|
inputs_[input_order_[i]]->Eval(context, input_result);
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
result = input_result;
|
result = input_result;
|
||||||
auto all_flat_result = GetColumnVector(result);
|
auto all_flat_result = GetColumnVector(result);
|
||||||
@ -100,6 +106,7 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
SkipFollowingExprs(i + 1);
|
SkipFollowingExprs(i + 1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
SetNextExprBitmapInput(all_flat_result, context);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto input_flat_result = GetColumnVector(input_result);
|
auto input_flat_result = GetColumnVector(input_result);
|
||||||
@ -110,7 +117,9 @@ PhyConjunctFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
SkipFollowingExprs(i + 1);
|
SkipFollowingExprs(i + 1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
SetNextExprBitmapInput(all_flat_result, context);
|
||||||
}
|
}
|
||||||
|
ClearBitmapInput(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
} //namespace exec
|
} //namespace exec
|
||||||
|
|||||||
@ -66,7 +66,7 @@ struct ConjunctElementFunc {
|
|||||||
class PhyConjunctFilterExpr : public Expr {
|
class PhyConjunctFilterExpr : public Expr {
|
||||||
public:
|
public:
|
||||||
PhyConjunctFilterExpr(std::vector<ExprPtr>&& inputs, bool is_and)
|
PhyConjunctFilterExpr(std::vector<ExprPtr>&& inputs, bool is_and)
|
||||||
: Expr(DataType::BOOL, std::move(inputs), is_and ? "and" : "or"),
|
: Expr(DataType::BOOL, std::move(inputs), "PhyConjunctFilterExpr"),
|
||||||
is_and_(is_and) {
|
is_and_(is_and) {
|
||||||
std::vector<DataType> input_types;
|
std::vector<DataType> input_types;
|
||||||
input_types.reserve(inputs_.size());
|
input_types.reserve(inputs_.size());
|
||||||
@ -101,6 +101,63 @@ class PhyConjunctFilterExpr : public Expr {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
if (!input_order_.empty()) {
|
||||||
|
std::vector<std::string> inputs;
|
||||||
|
for (auto& i : input_order_) {
|
||||||
|
inputs.push_back(inputs_[i]->ToString());
|
||||||
|
}
|
||||||
|
std::string input_str =
|
||||||
|
is_and_ ? Join(inputs, " && ") : Join(inputs, " || ");
|
||||||
|
return fmt::format("[ConjuctExpr:{}]", input_str);
|
||||||
|
}
|
||||||
|
std::vector<std::string> inputs;
|
||||||
|
for (auto& in : inputs_) {
|
||||||
|
inputs.push_back(in->ToString());
|
||||||
|
}
|
||||||
|
std::string input_str =
|
||||||
|
is_and_ ? Join(inputs, " && ") : Join(inputs, "||");
|
||||||
|
return fmt::format("[ConjuctExpr:{}]", input_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Reorder(const std::vector<size_t>& exprs_order) {
|
||||||
|
input_order_ = exprs_order;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t>
|
||||||
|
GetReorder() {
|
||||||
|
return input_order_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SetNextExprBitmapInput(const ColumnVectorPtr& vec, EvalCtx& context) {
|
||||||
|
TargetBitmapView last_res_bitmap(vec->GetRawData(), vec->size());
|
||||||
|
TargetBitmap next_input_bitmap(last_res_bitmap);
|
||||||
|
if (is_and_) {
|
||||||
|
context.set_bitmap_input(std::move(next_input_bitmap));
|
||||||
|
} else {
|
||||||
|
next_input_bitmap.flip();
|
||||||
|
context.set_bitmap_input(std::move(next_input_bitmap));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ClearBitmapInput(EvalCtx& context) {
|
||||||
|
context.clear_bitmap_input();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int64_t
|
int64_t
|
||||||
UpdateResult(ColumnVectorPtr& input_result,
|
UpdateResult(ColumnVectorPtr& input_result,
|
||||||
@ -117,7 +174,7 @@ class PhyConjunctFilterExpr : public Expr {
|
|||||||
SkipFollowingExprs(int start);
|
SkipFollowingExprs(int start);
|
||||||
// true if conjunction (and), false if disjunction (or).
|
// true if conjunction (and), false if disjunction (or).
|
||||||
bool is_and_;
|
bool is_and_;
|
||||||
std::vector<int32_t> input_order_;
|
std::vector<size_t> input_order_;
|
||||||
};
|
};
|
||||||
} //namespace exec
|
} //namespace exec
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|||||||
@ -69,12 +69,30 @@ class EvalCtx {
|
|||||||
offset_input_ = offset_input;
|
offset_input_ = offset_input;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
set_bitmap_input(TargetBitmap&& bitmap_input) {
|
||||||
|
bitmap_input_ = std::move(bitmap_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const TargetBitmap&
|
||||||
|
get_bitmap_input() const {
|
||||||
|
return bitmap_input_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
clear_bitmap_input() {
|
||||||
|
bitmap_input_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExecContext* exec_ctx_ = nullptr;
|
ExecContext* exec_ctx_ = nullptr;
|
||||||
ExprSet* expr_set_ = nullptr;
|
ExprSet* expr_set_ = nullptr;
|
||||||
// we may accept offsets array as input and do expr filtering on these data
|
// we may accept offsets array as input and do expr filtering on these data
|
||||||
OffsetVector* offset_input_ = nullptr;
|
OffsetVector* offset_input_ = nullptr;
|
||||||
bool input_no_nulls_ = false;
|
bool input_no_nulls_ = false;
|
||||||
|
|
||||||
|
// used for expr pre filter, that avoid unnecessary execution on filtered data
|
||||||
|
TargetBitmap bitmap_input_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace exec
|
} // namespace exec
|
||||||
|
|||||||
@ -30,7 +30,7 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
PanicInfo(ExprInvalid,
|
PanicInfo(ExprInvalid,
|
||||||
"exists expr for json index mode not supported");
|
"exists expr for json index mode not supported");
|
||||||
}
|
}
|
||||||
result = EvalJsonExistsForDataSegment(input);
|
result = EvalJsonExistsForDataSegment(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -41,21 +41,26 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
FieldId field_id = expr_->column_.field_id_;
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&bitmap_input, &
|
||||||
|
processed_cursor ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -63,6 +68,7 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
|||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer) {
|
const std::string& pointer) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -72,8 +78,12 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[offset].exist(pointer);
|
res[i] = data[offset].exist(pointer);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
|
|||||||
@ -57,9 +57,24 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalJsonExistsForDataSegment(OffsetVector* input = nullptr);
|
EvalJsonExistsForDataSegment(EvalCtx& context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
||||||
|
|||||||
@ -67,7 +67,9 @@ CompileExpressions(const std::vector<expr::TypedExprPtr>& sources,
|
|||||||
enable_constant_folding));
|
enable_constant_folding));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (OPTIMIZE_EXPR_ENABLED) {
|
||||||
OptimizeCompiledExprs(context, exprs);
|
OptimizeCompiledExprs(context, exprs);
|
||||||
|
}
|
||||||
|
|
||||||
return exprs;
|
return exprs;
|
||||||
}
|
}
|
||||||
@ -303,9 +305,174 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsLikeExpr(std::shared_ptr<Expr> input) {
|
||||||
|
if (input->name() == "PhyUnaryRangeFilterExpr") {
|
||||||
|
auto optype = std::static_pointer_cast<PhyUnaryRangeFilterExpr>(input)
|
||||||
|
->GetLogicalExpr()
|
||||||
|
->op_type_;
|
||||||
|
switch (optype) {
|
||||||
|
case proto::plan::PrefixMatch:
|
||||||
|
case proto::plan::PostfixMatch:
|
||||||
|
case proto::plan::Match:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
ReorderConjunctExpr(std::shared_ptr<milvus::exec::PhyConjunctFilterExpr>& expr,
|
||||||
|
ExecContext* context,
|
||||||
|
bool& has_heavy_operation) {
|
||||||
|
auto* segment = context->get_query_context()->get_segment();
|
||||||
|
if (!segment || !expr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<size_t> reorder;
|
||||||
|
std::vector<size_t> numeric_expr;
|
||||||
|
std::vector<size_t> indexed_expr;
|
||||||
|
std::vector<size_t> string_expr;
|
||||||
|
std::vector<size_t> str_like_expr;
|
||||||
|
std::vector<size_t> json_expr;
|
||||||
|
std::vector<size_t> json_like_expr;
|
||||||
|
std::vector<size_t> array_expr;
|
||||||
|
std::vector<size_t> array_like_expr;
|
||||||
|
std::vector<size_t> compare_expr;
|
||||||
|
std::vector<size_t> other_expr;
|
||||||
|
std::vector<size_t> heavy_conjunct_expr;
|
||||||
|
std::vector<size_t> light_conjunct_expr;
|
||||||
|
|
||||||
|
const auto& inputs = expr->GetInputsRef();
|
||||||
|
for (int i = 0; i < inputs.size(); i++) {
|
||||||
|
auto input = inputs[i];
|
||||||
|
|
||||||
|
if (input->IsSource() && input->GetColumnInfo().has_value()) {
|
||||||
|
auto column = input->GetColumnInfo().value();
|
||||||
|
if (IsNumericDataType(column.data_type_)) {
|
||||||
|
numeric_expr.push_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (segment->HasIndex(column.field_id_)) {
|
||||||
|
indexed_expr.push_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsStringDataType(column.data_type_)) {
|
||||||
|
auto is_like_expr = IsLikeExpr(input);
|
||||||
|
if (is_like_expr) {
|
||||||
|
str_like_expr.push_back(i);
|
||||||
|
has_heavy_operation = true;
|
||||||
|
} else {
|
||||||
|
string_expr.push_back(i);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsArrayDataType(column.data_type_)) {
|
||||||
|
auto is_like_expr = IsLikeExpr(input);
|
||||||
|
if (is_like_expr) {
|
||||||
|
array_like_expr.push_back(i);
|
||||||
|
has_heavy_operation = true;
|
||||||
|
} else {
|
||||||
|
array_expr.push_back(i);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsJsonDataType(column.data_type_)) {
|
||||||
|
auto is_like_expr = IsLikeExpr(input);
|
||||||
|
if (is_like_expr) {
|
||||||
|
json_like_expr.push_back(i);
|
||||||
|
} else {
|
||||||
|
json_expr.push_back(i);
|
||||||
|
}
|
||||||
|
has_heavy_operation = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input->name() == "PhyConjunctFilterExpr") {
|
||||||
|
bool sub_expr_heavy = false;
|
||||||
|
auto expr = std::static_pointer_cast<PhyConjunctFilterExpr>(input);
|
||||||
|
ReorderConjunctExpr(expr, context, sub_expr_heavy);
|
||||||
|
has_heavy_operation |= sub_expr_heavy;
|
||||||
|
if (sub_expr_heavy) {
|
||||||
|
heavy_conjunct_expr.push_back(i);
|
||||||
|
} else {
|
||||||
|
light_conjunct_expr.push_back(i);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input->name() == "PhyCompareFilterExpr") {
|
||||||
|
compare_expr.push_back(i);
|
||||||
|
has_heavy_operation = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
other_expr.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
reorder.reserve(inputs.size());
|
||||||
|
// Final reorder sequence:
|
||||||
|
// 1. Numeric column expressions (fastest to evaluate)
|
||||||
|
// 2. Indexed column expressions (can use index for efficient filtering)
|
||||||
|
// 3. String column expressions
|
||||||
|
// 4. Light conjunct expressions (conjunctions without heavy operations)
|
||||||
|
// 5. Other expressions
|
||||||
|
// 6. Array column expression
|
||||||
|
// 7. String like expression
|
||||||
|
// 8. Array like expression
|
||||||
|
// 9. JSON column expressions (expensive to evaluate)
|
||||||
|
// 10. JSON like expression (more expensive than common json compare)
|
||||||
|
// 11. Heavy conjunct expressions (conjunctions with heavy operations)
|
||||||
|
// 12. Compare filter expressions (most expensive, comparing two columns)
|
||||||
|
reorder.insert(reorder.end(), numeric_expr.begin(), numeric_expr.end());
|
||||||
|
reorder.insert(reorder.end(), indexed_expr.begin(), indexed_expr.end());
|
||||||
|
reorder.insert(reorder.end(), string_expr.begin(), string_expr.end());
|
||||||
|
reorder.insert(
|
||||||
|
reorder.end(), light_conjunct_expr.begin(), light_conjunct_expr.end());
|
||||||
|
reorder.insert(reorder.end(), other_expr.begin(), other_expr.end());
|
||||||
|
reorder.insert(reorder.end(), array_expr.begin(), array_expr.end());
|
||||||
|
reorder.insert(reorder.end(), str_like_expr.begin(), str_like_expr.end());
|
||||||
|
reorder.insert(
|
||||||
|
reorder.end(), array_like_expr.begin(), array_like_expr.end());
|
||||||
|
reorder.insert(reorder.end(), json_expr.begin(), json_expr.end());
|
||||||
|
reorder.insert(reorder.end(), json_like_expr.begin(), json_like_expr.end());
|
||||||
|
reorder.insert(
|
||||||
|
reorder.end(), heavy_conjunct_expr.begin(), heavy_conjunct_expr.end());
|
||||||
|
reorder.insert(reorder.end(), compare_expr.begin(), compare_expr.end());
|
||||||
|
|
||||||
|
AssertInfo(reorder.size() == inputs.size(),
|
||||||
|
"reorder size:{} but input size:{}",
|
||||||
|
reorder.size(),
|
||||||
|
inputs.size());
|
||||||
|
|
||||||
|
expr->Reorder(reorder);
|
||||||
|
}
|
||||||
|
|
||||||
inline void
|
inline void
|
||||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
||||||
//TODO: add optimization pattern
|
std::chrono::high_resolution_clock::time_point start =
|
||||||
|
std::chrono::high_resolution_clock::now();
|
||||||
|
for (const auto& expr : exprs) {
|
||||||
|
if (expr->name() == "PhyConjunctFilterExpr") {
|
||||||
|
LOG_DEBUG("before reoder filter expression: {}", expr->ToString());
|
||||||
|
auto conjunct_expr =
|
||||||
|
std::static_pointer_cast<PhyConjunctFilterExpr>(expr);
|
||||||
|
bool has_heavy_operation = false;
|
||||||
|
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
|
||||||
|
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::chrono::high_resolution_clock::time_point end =
|
||||||
|
std::chrono::high_resolution_clock::now();
|
||||||
|
double cost =
|
||||||
|
std::chrono::duration<double, std::micro>(end - start).count();
|
||||||
|
monitor::internal_core_optimize_expr_latency.Observe(cost / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace exec
|
} // namespace exec
|
||||||
|
|||||||
@ -64,7 +64,7 @@ class Expr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
get_name() {
|
name() {
|
||||||
return name_;
|
return name_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,9 +88,29 @@ class Expr {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual std::string
|
||||||
|
ToString() const {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
IsSource() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::shared_ptr<Expr>>&
|
||||||
|
GetInputsRef() {
|
||||||
|
return inputs_;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DataType type_;
|
DataType type_;
|
||||||
const std::vector<std::shared_ptr<Expr>> inputs_;
|
std::vector<std::shared_ptr<Expr>> inputs_;
|
||||||
std::string name_;
|
std::string name_;
|
||||||
// NOTE: unused
|
// NOTE: unused
|
||||||
std::shared_ptr<VectorFunction> vector_func_;
|
std::shared_ptr<VectorFunction> vector_func_;
|
||||||
@ -167,6 +187,11 @@ class SegmentExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
MoveCursorForDataMultipleChunk() {
|
MoveCursorForDataMultipleChunk() {
|
||||||
int64_t processed_size = 0;
|
int64_t processed_size = 0;
|
||||||
@ -1142,6 +1167,9 @@ class SegmentExpr : public Expr {
|
|||||||
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsLikeExpr(std::shared_ptr<Expr> expr);
|
||||||
|
|
||||||
void
|
void
|
||||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs);
|
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs);
|
||||||
|
|
||||||
|
|||||||
@ -30,17 +30,17 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
if (is_index_mode_ && !has_offset_input_) {
|
if (is_index_mode_ && !has_offset_input_) {
|
||||||
result = EvalArrayContainsForIndexSegment();
|
result = EvalArrayContainsForIndexSegment();
|
||||||
} else {
|
} else {
|
||||||
result = EvalJsonContainsForDataSegment(input);
|
result = EvalJsonContainsForDataSegment(context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
if (is_index_mode_ && !has_offset_input_) {
|
if (is_index_mode_ && !context.get_offset_input()) {
|
||||||
PanicInfo(
|
PanicInfo(ExprInvalid,
|
||||||
ExprInvalid,
|
"exists expr for json or array index mode not "
|
||||||
"exists expr for json or array index mode not supported");
|
"supported");
|
||||||
}
|
}
|
||||||
result = EvalJsonContainsForDataSegment(input);
|
result = EvalJsonContainsForDataSegment(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -51,7 +51,7 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(EvalCtx& context) {
|
||||||
auto data_type = expr_->column_.data_type_;
|
auto data_type = expr_->column_.data_type_;
|
||||||
switch (expr_->op_) {
|
switch (expr_->op_) {
|
||||||
case proto::plan::JSONContainsExpr_JSONOp_Contains:
|
case proto::plan::JSONContainsExpr_JSONOp_Contains:
|
||||||
@ -60,16 +60,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
auto val_type = expr_->vals_[0].val_case();
|
auto val_type = expr_->vals_[0].val_case();
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::kBoolVal: {
|
case proto::plan::GenericValue::kBoolVal: {
|
||||||
return ExecArrayContains<bool>(input);
|
return ExecArrayContains<bool>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kInt64Val: {
|
case proto::plan::GenericValue::kInt64Val: {
|
||||||
return ExecArrayContains<int64_t>(input);
|
return ExecArrayContains<int64_t>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kFloatVal: {
|
case proto::plan::GenericValue::kFloatVal: {
|
||||||
return ExecArrayContains<double>(input);
|
return ExecArrayContains<double>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kStringVal: {
|
case proto::plan::GenericValue::kStringVal: {
|
||||||
return ExecArrayContains<std::string>(input);
|
return ExecArrayContains<std::string>(context);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
@ -81,19 +81,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
auto val_type = expr_->vals_[0].val_case();
|
auto val_type = expr_->vals_[0].val_case();
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::kBoolVal: {
|
case proto::plan::GenericValue::kBoolVal: {
|
||||||
return ExecJsonContains<bool>(input);
|
return ExecJsonContains<bool>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kInt64Val: {
|
case proto::plan::GenericValue::kInt64Val: {
|
||||||
return ExecJsonContains<int64_t>(input);
|
return ExecJsonContains<int64_t>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kFloatVal: {
|
case proto::plan::GenericValue::kFloatVal: {
|
||||||
return ExecJsonContains<double>(input);
|
return ExecJsonContains<double>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kStringVal: {
|
case proto::plan::GenericValue::kStringVal: {
|
||||||
return ExecJsonContains<std::string>(input);
|
return ExecJsonContains<std::string>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kArrayVal: {
|
case proto::plan::GenericValue::kArrayVal: {
|
||||||
return ExecJsonContainsArray(input);
|
return ExecJsonContainsArray(context);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid,
|
PanicInfo(DataTypeInvalid,
|
||||||
@ -101,7 +101,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
val_type);
|
val_type);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return ExecJsonContainsWithDiffType(input);
|
return ExecJsonContainsWithDiffType(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -110,16 +110,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
auto val_type = expr_->vals_[0].val_case();
|
auto val_type = expr_->vals_[0].val_case();
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::kBoolVal: {
|
case proto::plan::GenericValue::kBoolVal: {
|
||||||
return ExecArrayContainsAll<bool>(input);
|
return ExecArrayContainsAll<bool>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kInt64Val: {
|
case proto::plan::GenericValue::kInt64Val: {
|
||||||
return ExecArrayContainsAll<int64_t>(input);
|
return ExecArrayContainsAll<int64_t>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kFloatVal: {
|
case proto::plan::GenericValue::kFloatVal: {
|
||||||
return ExecArrayContainsAll<double>(input);
|
return ExecArrayContainsAll<double>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kStringVal: {
|
case proto::plan::GenericValue::kStringVal: {
|
||||||
return ExecArrayContainsAll<std::string>(input);
|
return ExecArrayContainsAll<std::string>(context);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
@ -131,19 +131,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
auto val_type = expr_->vals_[0].val_case();
|
auto val_type = expr_->vals_[0].val_case();
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::kBoolVal: {
|
case proto::plan::GenericValue::kBoolVal: {
|
||||||
return ExecJsonContainsAll<bool>(input);
|
return ExecJsonContainsAll<bool>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kInt64Val: {
|
case proto::plan::GenericValue::kInt64Val: {
|
||||||
return ExecJsonContainsAll<int64_t>(input);
|
return ExecJsonContainsAll<int64_t>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kFloatVal: {
|
case proto::plan::GenericValue::kFloatVal: {
|
||||||
return ExecJsonContainsAll<double>(input);
|
return ExecJsonContainsAll<double>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kStringVal: {
|
case proto::plan::GenericValue::kStringVal: {
|
||||||
return ExecJsonContainsAll<std::string>(input);
|
return ExecJsonContainsAll<std::string>(context);
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::kArrayVal: {
|
case proto::plan::GenericValue::kArrayVal: {
|
||||||
return ExecJsonContainsAllArray(input);
|
return ExecJsonContainsAllArray(context);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid,
|
PanicInfo(DataTypeInvalid,
|
||||||
@ -151,7 +151,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
val_type);
|
val_type);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return ExecJsonContainsAllWithDiffType(input);
|
return ExecJsonContainsAllWithDiffType(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -164,11 +164,13 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecArrayContains(EvalCtx& context) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
@ -177,18 +179,21 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
|||||||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||||
"[ExecArrayContains]nested path must be null");
|
"[ExecArrayContains]nested path must be null");
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::ArrayView* data,
|
const milvus::ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -205,6 +210,7 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -214,8 +220,12 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -241,30 +251,36 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -290,6 +306,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -299,8 +316,12 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -329,26 +350,31 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
std::vector<proto::plan::Array> elements;
|
std::vector<proto::plan::Array> elements;
|
||||||
for (auto const& element : expr_->vals_) {
|
for (auto const& element : expr_->vals_) {
|
||||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -383,6 +409,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -392,8 +419,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -423,11 +454,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||||
"[ExecArrayContainsAll]nested path must be null");
|
"[ExecArrayContainsAll]nested path must be null");
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
@ -436,19 +469,21 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
std::set<GetType> elements;
|
std::set<GetType> elements;
|
||||||
for (auto const& element : expr_->vals_) {
|
for (auto const& element : expr_->vals_) {
|
||||||
elements.insert(GetValueFromProto<GetType>(element));
|
elements.insert(GetValueFromProto<GetType>(element));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::ArrayView* data,
|
const milvus::ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -467,6 +502,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return tmp_elements.size() == 0;
|
return tmp_elements.size() == 0;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -476,8 +512,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -503,22 +543,24 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
std::set<GetType> elements;
|
std::set<GetType> elements;
|
||||||
@ -526,8 +568,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
|||||||
elements.insert(GetValueFromProto<GetType>(element));
|
elements.insert(GetValueFromProto<GetType>(element));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -556,6 +600,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return tmp_elements.size() == 0;
|
return tmp_elements.size() == 0;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -565,8 +610,12 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -595,18 +644,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||||
OffsetVector* input) {
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -618,8 +668,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -707,6 +759,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
|||||||
}
|
}
|
||||||
return tmp_elements_index.size() == 0;
|
return tmp_elements_index.size() == 0;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -716,8 +769,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -748,18 +806,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -767,8 +827,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
|||||||
for (auto const& element : expr_->vals_) {
|
for (auto const& element : expr_->vals_) {
|
||||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -807,6 +870,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return exist_elements_index.size() == elements.size();
|
return exist_elements_index.size() == elements.size();
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -816,8 +880,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -846,18 +915,20 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -869,8 +940,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -949,6 +1022,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -958,8 +1032,13 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
|
|||||||
@ -51,37 +51,52 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalJsonContainsForDataSegment(OffsetVector* input = nullptr);
|
EvalJsonContainsForDataSegment(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContains(OffsetVector* input = nullptr);
|
ExecJsonContains(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecArrayContains(OffsetVector* input = nullptr);
|
ExecArrayContains(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAll(OffsetVector* input = nullptr);
|
ExecJsonContainsAll(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecArrayContainsAll(OffsetVector* input = nullptr);
|
ExecArrayContainsAll(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsArray(OffsetVector* input = nullptr);
|
ExecJsonContainsArray(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAllArray(OffsetVector* input = nullptr);
|
ExecJsonContainsAllArray(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr);
|
ExecJsonContainsAllWithDiffType(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsWithDiffType(OffsetVector* input = nullptr);
|
ExecJsonContainsWithDiffType(EvalCtx& context);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalArrayContainsForIndexSegment();
|
EvalArrayContainsForIndexSegment();
|
||||||
|
|||||||
@ -87,6 +87,21 @@ class PhyLogicalBinaryExpr : public Expr {
|
|||||||
inputs_[1]->SupportOffsetInput();
|
inputs_[1]->SupportOffsetInput();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::LogicalBinaryExpr> expr_;
|
std::shared_ptr<const milvus::expr::LogicalBinaryExpr> expr_;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -51,6 +51,21 @@ class PhyLogicalUnaryExpr : public Expr {
|
|||||||
return inputs_[0]->SupportOffsetInput();
|
return inputs_[0]->SupportOffsetInput();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::LogicalUnaryExpr> expr_;
|
std::shared_ptr<const milvus::expr::LogicalUnaryExpr> expr_;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -50,6 +50,21 @@ class PhyNullExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ColumnVectorPtr
|
ColumnVectorPtr
|
||||||
PreCheckNullable(OffsetVector* input);
|
PreCheckNullable(OffsetVector* input);
|
||||||
|
|||||||
@ -32,31 +32,31 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
}
|
}
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecVisitorImpl<bool>(input);
|
result = ExecVisitorImpl<bool>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecVisitorImpl<int8_t>(input);
|
result = ExecVisitorImpl<int8_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecVisitorImpl<int16_t>(input);
|
result = ExecVisitorImpl<int16_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecVisitorImpl<int32_t>(input);
|
result = ExecVisitorImpl<int32_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecVisitorImpl<int64_t>(input);
|
result = ExecVisitorImpl<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecVisitorImpl<float>(input);
|
result = ExecVisitorImpl<float>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecVisitorImpl<double>(input);
|
result = ExecVisitorImpl<double>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -64,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecVisitorImpl<std::string>(input);
|
result = ExecVisitorImpl<std::string>(context);
|
||||||
} else {
|
} else {
|
||||||
result = ExecVisitorImpl<std::string_view>(input);
|
result = ExecVisitorImpl<std::string_view>(context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
if (expr_->vals_.size() == 0) {
|
if (expr_->vals_.size() == 0) {
|
||||||
result = ExecVisitorImplTemplateJson<bool>(input);
|
result = ExecVisitorImplTemplateJson<bool>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto type = expr_->vals_[0].val_case();
|
auto type = expr_->vals_[0].val_case();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
result = ExecVisitorImplTemplateJson<bool>(input);
|
result = ExecVisitorImplTemplateJson<bool>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
result = ExecVisitorImplTemplateJson<int64_t>(input);
|
result = ExecVisitorImplTemplateJson<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
result = ExecVisitorImplTemplateJson<double>(input);
|
result = ExecVisitorImplTemplateJson<double>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
result = ExecVisitorImplTemplateJson<std::string>(input);
|
result = ExecVisitorImplTemplateJson<std::string>(context);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||||
@ -97,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
case DataType::ARRAY: {
|
case DataType::ARRAY: {
|
||||||
if (expr_->vals_.size() == 0) {
|
if (expr_->vals_.size() == 0) {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<bool>(input);
|
result = ExecVisitorImplTemplateArray<bool>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto type = expr_->vals_[0].val_case();
|
auto type = expr_->vals_[0].val_case();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<bool>(input);
|
result = ExecVisitorImplTemplateArray<bool>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<int64_t>(input);
|
result = ExecVisitorImplTemplateArray<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<double>(input);
|
result = ExecVisitorImplTemplateArray<double>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<std::string>(input);
|
result = ExecVisitorImplTemplateArray<std::string>(context);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||||
@ -216,12 +216,11 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
// pk valid_bitmap is always all true
|
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto current_chunk_view =
|
auto current_chunk_view =
|
||||||
cached_bits_.view(current_data_chunk_pos_, real_batch_size);
|
cached_bits_.view(current_data_chunk_pos_, real_batch_size);
|
||||||
@ -233,9 +232,9 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImplTemplateJson(EvalCtx& context) {
|
||||||
if (expr_->is_in_field_) {
|
if (expr_->is_in_field_) {
|
||||||
return ExecTermJsonVariableInField<ValueType>(input);
|
return ExecTermJsonVariableInField<ValueType>(context);
|
||||||
} else {
|
} else {
|
||||||
if (is_index_mode_) {
|
if (is_index_mode_) {
|
||||||
// we create double index for json int64 field for now
|
// we create double index for json int64 field for now
|
||||||
@ -243,40 +242,42 @@ PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
|
|||||||
std::conditional_t<std::is_same_v<ValueType, int64_t>,
|
std::conditional_t<std::is_same_v<ValueType, int64_t>,
|
||||||
double,
|
double,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
return ExecVisitorImplForIndex<GetType>(input);
|
return ExecVisitorImplForIndex<GetType>();
|
||||||
} else {
|
} else {
|
||||||
return ExecTermJsonFieldInVariable<ValueType>(input);
|
return ExecTermJsonFieldInVariable<ValueType>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImplTemplateArray(EvalCtx& context) {
|
||||||
if (expr_->is_in_field_) {
|
if (expr_->is_in_field_) {
|
||||||
return ExecTermArrayVariableInField<ValueType>(input);
|
return ExecTermArrayVariableInField<ValueType>(context);
|
||||||
} else {
|
} else {
|
||||||
return ExecTermArrayFieldInVariable<ValueType>(input);
|
return ExecTermArrayFieldInVariable<ValueType>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
PhyTermFilterExpr::ExecTermArrayVariableInField(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
AssertInfo(expr_->vals_.size() == 1,
|
AssertInfo(expr_->vals_.size() == 1,
|
||||||
"element length in json array must be one");
|
"element length in json array must be one");
|
||||||
@ -286,8 +287,10 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
auto target_val = arg_val_.GetValue<ValueType>();
|
auto target_val = arg_val_.GetValue<ValueType>();
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const ArrayView* data,
|
const ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -304,6 +307,7 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -313,8 +317,12 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -340,22 +348,24 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
int index = -1;
|
int index = -1;
|
||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
@ -372,8 +382,10 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const ArrayView* data,
|
const ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -382,6 +394,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
|||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
int index,
|
int index,
|
||||||
const std::shared_ptr<MultiElement>& term_set) {
|
const std::shared_ptr<MultiElement>& term_set) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -395,9 +408,13 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
|||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto value = data[offset].get_data<GetType>(index);
|
auto value = data[offset].get_data<GetType>(index);
|
||||||
res[i] = term_set->In(ValueType(value));
|
res[i] = term_set->In(ValueType(value));
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
@ -428,21 +445,23 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
AssertInfo(expr_->vals_.size() == 1,
|
AssertInfo(expr_->vals_.size() == 1,
|
||||||
"element length in json array must be one");
|
"element length in json array must be one");
|
||||||
@ -454,8 +473,10 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
|||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const Json* data,
|
const Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -480,6 +501,7 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -489,8 +511,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -515,21 +541,25 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
FieldId field_id = expr_->column_.field_id_;
|
||||||
|
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
@ -543,8 +573,10 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const Json* data,
|
const Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -571,6 +603,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
return terms->In(ValueType(x.value()));
|
return terms->In(ValueType(x.value()));
|
||||||
};
|
};
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -584,8 +617,13 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
|||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (has_bitmap_input && !bitmap_input[processed_cursor + i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(offset);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -614,17 +652,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImpl(EvalCtx& context) {
|
||||||
if (is_index_mode_ && !has_offset_input_) {
|
if (is_index_mode_ && !has_offset_input_) {
|
||||||
return ExecVisitorImplForIndex<T>(input);
|
return ExecVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
return ExecVisitorImplForData<T>(input);
|
return ExecVisitorImplForData<T>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
@ -667,7 +705,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
||||||
using Index = index::ScalarIndex<bool>;
|
using Index = index::ScalarIndex<bool>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size = GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
@ -689,18 +727,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
std::vector<T> vals;
|
std::vector<T> vals;
|
||||||
@ -717,8 +758,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
|||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[]<FilterType filter_type = FilterType::sequential>(
|
[&processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const T* data,
|
const T* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -726,6 +769,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
|||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
const std::shared_ptr<MultiElement>& vals) {
|
const std::shared_ptr<MultiElement>& vals) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -735,8 +779,12 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = vals->In(data[offset]);
|
res[i] = vals->In(data[offset]);
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
|
|||||||
@ -75,6 +75,21 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void
|
void
|
||||||
InitPkCacheOffset();
|
InitPkCacheOffset();
|
||||||
@ -88,39 +103,39 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImpl(OffsetVector* input = nullptr);
|
ExecVisitorImpl(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplForIndex(OffsetVector* input = nullptr);
|
ExecVisitorImplForIndex();
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplForData(OffsetVector* input = nullptr);
|
ExecVisitorImplForData(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplTemplateJson(OffsetVector* input = nullptr);
|
ExecVisitorImplTemplateJson(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermJsonVariableInField(OffsetVector* input = nullptr);
|
ExecTermJsonVariableInField(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermJsonFieldInVariable(OffsetVector* input = nullptr);
|
ExecTermJsonFieldInVariable(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplTemplateArray(OffsetVector* input = nullptr);
|
ExecVisitorImplTemplateArray(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermArrayVariableInField(OffsetVector* input = nullptr);
|
ExecTermArrayVariableInField(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermArrayFieldInVariable(OffsetVector* input = nullptr);
|
ExecTermArrayFieldInVariable(EvalCtx& context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
||||||
|
|||||||
@ -89,51 +89,51 @@ PhyUnaryRangeFilterExpr::CanUseIndexForArray<milvus::Array>() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex() {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex(EvalCtx& context) {
|
||||||
return ExecRangeVisitorImplArray<T>();
|
return ExecRangeVisitorImplArray<T>(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<proto::plan::Array>(
|
||||||
proto::plan::Array>() {
|
EvalCtx& context) {
|
||||||
switch (expr_->op_type_) {
|
switch (expr_->op_type_) {
|
||||||
case proto::plan::Equal:
|
case proto::plan::Equal:
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
switch (expr_->column_.element_type_) {
|
switch (expr_->column_.element_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
return ExecArrayEqualForIndex<bool>(expr_->op_type_ ==
|
return ExecArrayEqualForIndex<bool>(
|
||||||
proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
return ExecArrayEqualForIndex<int8_t>(
|
return ExecArrayEqualForIndex<int8_t>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
return ExecArrayEqualForIndex<int16_t>(
|
return ExecArrayEqualForIndex<int16_t>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
return ExecArrayEqualForIndex<int32_t>(
|
return ExecArrayEqualForIndex<int32_t>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
return ExecArrayEqualForIndex<int64_t>(
|
return ExecArrayEqualForIndex<int64_t>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
// not accurate on floating point number, rollback to bruteforce.
|
// not accurate on floating point number, rollback to bruteforce.
|
||||||
return ExecRangeVisitorImplArray<proto::plan::Array>(
|
return ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||||
nullptr);
|
context);
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
if (segment_->type() == SegmentType::Growing) {
|
if (segment_->type() == SegmentType::Growing) {
|
||||||
return ExecArrayEqualForIndex<std::string>(
|
return ExecArrayEqualForIndex<std::string>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
} else {
|
} else {
|
||||||
return ExecArrayEqualForIndex<std::string_view>(
|
return ExecArrayEqualForIndex<std::string_view>(
|
||||||
expr_->op_type_ == proto::plan::NotEqual);
|
context, expr_->op_type_ == proto::plan::NotEqual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -144,7 +144,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return ExecRangeVisitorImplArray<proto::plan::Array>();
|
return ExecRangeVisitorImplArray<proto::plan::Array>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,31 +154,31 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
SetHasOffsetInput((input != nullptr));
|
SetHasOffsetInput((input != nullptr));
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecRangeVisitorImpl<bool>(input);
|
result = ExecRangeVisitorImpl<bool>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecRangeVisitorImpl<int8_t>(input);
|
result = ExecRangeVisitorImpl<int8_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecRangeVisitorImpl<int16_t>(input);
|
result = ExecRangeVisitorImpl<int16_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecRangeVisitorImpl<int32_t>(input);
|
result = ExecRangeVisitorImpl<int32_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecRangeVisitorImpl<int64_t>(input);
|
result = ExecRangeVisitorImpl<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecRangeVisitorImpl<float>(input);
|
result = ExecRangeVisitorImpl<float>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecRangeVisitorImpl<double>(input);
|
result = ExecRangeVisitorImpl<double>(context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -186,9 +186,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecRangeVisitorImpl<std::string>(input);
|
result = ExecRangeVisitorImpl<std::string>(context);
|
||||||
} else {
|
} else {
|
||||||
result = ExecRangeVisitorImpl<std::string_view>(input);
|
result = ExecRangeVisitorImpl<std::string_view>(context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -227,20 +227,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
} else {
|
} else {
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
result = ExecRangeVisitorImplJson<bool>(input);
|
result = ExecRangeVisitorImplJson<bool>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
result = ExecRangeVisitorImplJson<int64_t>(input);
|
result = ExecRangeVisitorImplJson<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
result = ExecRangeVisitorImplJson<double>(input);
|
result = ExecRangeVisitorImplJson<double>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
result = ExecRangeVisitorImplJson<std::string>(input);
|
result = ExecRangeVisitorImplJson<std::string>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
result =
|
result = ExecRangeVisitorImplJson<proto::plan::Array>(
|
||||||
ExecRangeVisitorImplJson<proto::plan::Array>(input);
|
context);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
@ -254,28 +254,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<bool>(input);
|
result = ExecRangeVisitorImplArray<bool>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<int64_t>(input);
|
result = ExecRangeVisitorImplArray<int64_t>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<double>(input);
|
result = ExecRangeVisitorImplArray<double>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<std::string>(input);
|
result = ExecRangeVisitorImplArray<std::string>(context);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
if (!has_offset_input_ &&
|
if (!has_offset_input_ &&
|
||||||
CanUseIndexForArray<milvus::Array>()) {
|
CanUseIndexForArray<milvus::Array>()) {
|
||||||
result = ExecRangeVisitorImplArrayForIndex<
|
result = ExecRangeVisitorImplArrayForIndex<
|
||||||
proto::plan::Array>();
|
proto::plan::Array>(context);
|
||||||
} else {
|
} else {
|
||||||
result = ExecRangeVisitorImplArray<proto::plan::Array>(
|
result = ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||||
input);
|
context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -293,17 +293,19 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
|
||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
value_arg_.SetValue<ValueType>(expr_->val_);
|
value_arg_.SetValue<ValueType>(expr_->val_);
|
||||||
@ -315,8 +317,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||||
}
|
}
|
||||||
auto execute_sub_batch = [op_type]<FilterType filter_type =
|
int processed_cursor = 0;
|
||||||
FilterType::sequential>(
|
auto execute_sub_batch =
|
||||||
|
[ op_type, &processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::ArrayView* data,
|
const milvus::ArrayView* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -338,6 +342,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -353,6 +359,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -368,6 +376,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -383,6 +393,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -398,6 +410,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -413,6 +427,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -428,6 +444,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -443,6 +461,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
index,
|
index,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
offsets);
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -452,6 +472,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
fmt::format("unsupported operator type for unary expr: {}",
|
fmt::format("unsupported operator type for unary expr: {}",
|
||||||
op_type));
|
op_type));
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -477,7 +498,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
|
||||||
|
bool reverse) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
@ -491,7 +513,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
|||||||
auto val = GetValueFromProto<proto::plan::Array>(expr_->val_);
|
auto val = GetValueFromProto<proto::plan::Array>(expr_->val_);
|
||||||
if (val.array_size() == 0) {
|
if (val.array_size() == 0) {
|
||||||
// rollback to bruteforce. no candidates will be filtered out via index.
|
// rollback to bruteforce. no candidates will be filtered out via index.
|
||||||
return ExecRangeVisitorImplArray<proto::plan::Array>();
|
return ExecRangeVisitorImplArray<proto::plan::Array>(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
// cache the result to suit the framework.
|
// cache the result to suit the framework.
|
||||||
@ -587,11 +609,14 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
FieldId field_id = expr_->column_.field_id_;
|
||||||
auto real_batch_size =
|
auto real_batch_size =
|
||||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
@ -602,13 +627,13 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
value_arg_.SetValue<ExprValueType>(expr_->val_);
|
value_arg_.SetValue<ExprValueType>(expr_->val_);
|
||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
|
auto res_vec =
|
||||||
ExprValueType val = value_arg_.GetValue<ExprValueType>();
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
|
ExprValueType val = value_arg_.GetValue<ExprValueType>();
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -642,8 +667,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = (cmp); \
|
res[i] = (cmp); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
|
int processed_cursor = 0;
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[ op_type, pointer ]<FilterType filter_type = FilterType::sequential>(
|
[ op_type, pointer, &processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -651,6 +678,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
ExprValueType val) {
|
ExprValueType val) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
@ -662,6 +690,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -680,6 +712,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -698,6 +734,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -716,6 +756,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -734,6 +778,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -758,6 +806,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -782,6 +834,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -804,6 +860,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input &&
|
||||||
|
!bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -819,6 +879,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
fmt::format("unsupported operator type for unary expr: {}",
|
fmt::format("unsupported operator type for unary expr: {}",
|
||||||
op_type));
|
op_type));
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
int64_t processed_size;
|
int64_t processed_size;
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -839,7 +900,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||||
if (expr_->op_type_ == proto::plan::OpType::TextMatch ||
|
if (expr_->op_type_ == proto::plan::OpType::TextMatch ||
|
||||||
expr_->op_type_ == proto::plan::OpType::PhraseMatch) {
|
expr_->op_type_ == proto::plan::OpType::PhraseMatch) {
|
||||||
if (has_offset_input_) {
|
if (has_offset_input_) {
|
||||||
@ -853,7 +914,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
|||||||
if (CanUseIndex<T>() && !has_offset_input_) {
|
if (CanUseIndex<T>() && !has_offset_input_) {
|
||||||
return ExecRangeVisitorImplForIndex<T>();
|
return ExecRangeVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
return ExecRangeVisitorImplForData<T>(input);
|
return ExecRangeVisitorImplForData<T>(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1003,10 +1064,13 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
|
auto* input = context.get_offset_input();
|
||||||
|
const auto& bitmap_input = context.get_bitmap_input();
|
||||||
|
|
||||||
if (auto res = PreCheckOverflow<T>(input)) {
|
if (auto res = PreCheckOverflow<T>(input)) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -1022,15 +1086,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec =
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
|
|
||||||
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
size_t processed_cursor = 0;
|
||||||
FilterType::sequential>(
|
auto execute_sub_batch =
|
||||||
|
[ expr_type, &processed_cursor, &
|
||||||
|
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const T* data,
|
const T* data,
|
||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
const int32_t* offsets,
|
const int32_t* offsets,
|
||||||
@ -1041,43 +1107,91 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
switch (expr_type) {
|
switch (expr_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
|
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
|
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
|
||||||
func;
|
func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
|
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
|
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
|
UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
|
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
|
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Match: {
|
case proto::plan::Match: {
|
||||||
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
|
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
|
||||||
func(data, size, val, res, offsets);
|
func(data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
res,
|
||||||
|
bitmap_input,
|
||||||
|
processed_cursor,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -1090,7 +1204,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
// but to mask res with valid_data after the batch operation.
|
// but to mask res with valid_data after the batch operation.
|
||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + processed_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
offset = (offsets) ? offsets[i] : i;
|
offset = (offsets) ? offsets[i] : i;
|
||||||
@ -1100,6 +1218,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
processed_cursor += size;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
||||||
|
|||||||
@ -41,15 +41,20 @@ struct UnaryElementFuncForMatch {
|
|||||||
|
|
||||||
void
|
void
|
||||||
operator()(const T* src,
|
operator()(const T* src,
|
||||||
|
|
||||||
size_t size,
|
size_t size,
|
||||||
IndexInnerType val,
|
IndexInnerType val,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
int64_t* offsets = nullptr) {
|
const TargetBitmap& bitmap_input,
|
||||||
|
int start_cursor,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
PatternMatchTranslator translator;
|
PatternMatchTranslator translator;
|
||||||
auto regex_pattern = translator(val);
|
auto regex_pattern = translator(val);
|
||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
res[i] = matcher(src[offsets ? offsets[i] : i]);
|
res[i] = matcher(src[offsets ? offsets[i] : i]);
|
||||||
} else {
|
} else {
|
||||||
@ -69,17 +74,25 @@ struct UnaryElementFunc {
|
|||||||
size_t size,
|
size_t size,
|
||||||
IndexInnerType val,
|
IndexInnerType val,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
if constexpr (op == proto::plan::OpType::Match) {
|
if constexpr (op == proto::plan::OpType::Match) {
|
||||||
UnaryElementFuncForMatch<T, filter_type> func;
|
UnaryElementFuncForMatch<T, filter_type> func;
|
||||||
func(src, size, val, res);
|
func(src, size, val, res, bitmap_input, start_cursor, offsets);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the original code, which is kept for the documentation purposes
|
// This is the original code, which is kept for the documentation purposes
|
||||||
// also, for iterative filter
|
// also, for iterative filter
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random ||
|
||||||
|
std::is_same_v<T, std::string_view> ||
|
||||||
|
std::is_same_v<T, std::string>) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto offset = (offsets != nullptr) ? offsets[i] : i;
|
auto offset = (offsets != nullptr) ? offsets[i] : i;
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
res[i] = src[offset] == val;
|
res[i] = src[offset] == val;
|
||||||
@ -164,7 +177,10 @@ struct UnaryElementFuncForArray {
|
|||||||
int index,
|
int index,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
|
const TargetBitmap& bitmap_input,
|
||||||
|
size_t start_cursor,
|
||||||
const int32_t* offsets = nullptr) {
|
const int32_t* offsets = nullptr) {
|
||||||
|
bool has_bitmap_input = !bitmap_input.empty();
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto offset = i;
|
auto offset = i;
|
||||||
if constexpr (filter_type == FilterType::random) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
@ -174,6 +190,9 @@ struct UnaryElementFuncForArray {
|
|||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (has_bitmap_input && !bitmap_input[i + start_cursor]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = src[offset].is_same_array(val);
|
res[i] = src[offset].is_same_array(val);
|
||||||
@ -340,10 +359,30 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return expr_->column_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr>
|
||||||
|
GetLogicalExpr() {
|
||||||
|
return expr_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
ExecRangeVisitorImpl(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
@ -351,23 +390,23 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplForData(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplJson(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplJson(EvalCtx& context);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplArray(OffsetVector* input = nullptr);
|
ExecRangeVisitorImplArray(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplArrayForIndex();
|
ExecRangeVisitorImplArrayForIndex(EvalCtx& context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecArrayEqualForIndex(bool reverse);
|
ExecArrayEqualForIndex(EvalCtx& context, bool reverse);
|
||||||
|
|
||||||
// Check overflow and cache result for performace
|
// Check overflow and cache result for performace
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
@ -59,6 +59,21 @@ class PhyValueExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ToString() const {
|
||||||
|
return fmt::format("{}", expr_->ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsSource() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<milvus::expr::ColumnInfo>
|
||||||
|
GetColumnInfo() const override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::ValueExpr> expr_;
|
std::shared_ptr<const milvus::expr::ValueExpr> expr_;
|
||||||
const int64_t active_count_;
|
const int64_t active_count_;
|
||||||
|
|||||||
@ -210,6 +210,8 @@ std::map<std::string, std::string> searchGetTargetEntryLatencyLabels{
|
|||||||
{"type", "search_get_target_entry_latency"}};
|
{"type", "search_get_target_entry_latency"}};
|
||||||
std::map<std::string, std::string> randomSampleLatencyLabels{
|
std::map<std::string, std::string> randomSampleLatencyLabels{
|
||||||
{"type", "random_sample_latency"}};
|
{"type", "random_sample_latency"}};
|
||||||
|
std::map<std::string, std::string> optimizeExprLatencyLabels{
|
||||||
|
{"type", "optimize_expr_latency"}};
|
||||||
|
|
||||||
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
|
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
|
||||||
"[cpp]latency(us) of search on segment")
|
"[cpp]latency(us) of search on segment")
|
||||||
@ -242,7 +244,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency,
|
|||||||
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample,
|
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample,
|
||||||
internal_core_search_latency,
|
internal_core_search_latency,
|
||||||
randomSampleLatencyLabels)
|
randomSampleLatencyLabels)
|
||||||
|
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency,
|
||||||
|
internal_core_search_latency,
|
||||||
|
optimizeExprLatencyLabels)
|
||||||
// mmap metrics
|
// mmap metrics
|
||||||
std::map<std::string, std::string> mmapAllocatedSpaceAnonLabel = {
|
std::map<std::string, std::string> mmapAllocatedSpaceAnonLabel = {
|
||||||
{"type", "anon"}};
|
{"type", "anon"}};
|
||||||
|
|||||||
@ -142,6 +142,7 @@ DECLARE_PROMETHEUS_HISTOGRAM(internal_core_get_vector_latency);
|
|||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_retrieve_get_target_entry_latency);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_retrieve_get_target_entry_latency);
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_get_target_entry_latency);
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_random_sample);
|
||||||
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_optimize_expr_latency);
|
||||||
|
|
||||||
// async cgo metrics
|
// async cgo metrics
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cgo_queue_duration_seconds);
|
DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cgo_queue_duration_seconds);
|
||||||
|
|||||||
@ -27,6 +27,7 @@
|
|||||||
#include "exec/QueryContext.h"
|
#include "exec/QueryContext.h"
|
||||||
#include "expr/ITypeExpr.h"
|
#include "expr/ITypeExpr.h"
|
||||||
#include "exec/expression/Expr.h"
|
#include "exec/expression/Expr.h"
|
||||||
|
#include "exec/expression/ConjunctExpr.h"
|
||||||
#include "exec/expression/function/FunctionFactory.h"
|
#include "exec/expression/function/FunctionFactory.h"
|
||||||
|
|
||||||
using namespace milvus;
|
using namespace milvus;
|
||||||
@ -82,10 +83,12 @@ class TaskTest : public testing::TestWithParam<DataType> {
|
|||||||
field_map_.insert({"string2", str2_fid});
|
field_map_.insert({"string2", str2_fid});
|
||||||
auto str3_fid = schema->AddDebugField("string3", DataType::VARCHAR);
|
auto str3_fid = schema->AddDebugField("string3", DataType::VARCHAR);
|
||||||
field_map_.insert({"string3", str3_fid});
|
field_map_.insert({"string3", str3_fid});
|
||||||
|
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||||
|
field_map_.insert({"json", json_fid});
|
||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto segment = CreateSealedSegment(schema);
|
auto segment = CreateSealedSegment(schema);
|
||||||
size_t N = 1000000;
|
size_t N = 100000;
|
||||||
num_rows_ = N;
|
num_rows_ = N;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
auto fields = schema->get_fields();
|
auto fields = schema->get_fields();
|
||||||
@ -152,7 +155,7 @@ TEST_P(TaskTest, CallExprEmpty) {
|
|||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
"test1",
|
"test1",
|
||||||
segment_.get(),
|
segment_.get(),
|
||||||
1000000,
|
100000,
|
||||||
MAX_TIMESTAMP,
|
MAX_TIMESTAMP,
|
||||||
std::make_shared<milvus::exec::QueryConfig>(
|
std::make_shared<milvus::exec::QueryConfig>(
|
||||||
std::unordered_map<std::string, std::string>{}));
|
std::unordered_map<std::string, std::string>{}));
|
||||||
@ -189,7 +192,7 @@ TEST_P(TaskTest, UnaryExpr) {
|
|||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
"test1",
|
"test1",
|
||||||
segment_.get(),
|
segment_.get(),
|
||||||
1000000,
|
100000,
|
||||||
MAX_TIMESTAMP,
|
MAX_TIMESTAMP,
|
||||||
std::make_shared<milvus::exec::QueryConfig>(
|
std::make_shared<milvus::exec::QueryConfig>(
|
||||||
std::unordered_map<std::string, std::string>{}));
|
std::unordered_map<std::string, std::string>{}));
|
||||||
@ -235,7 +238,7 @@ TEST_P(TaskTest, LogicalExpr) {
|
|||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
"test1",
|
"test1",
|
||||||
segment_.get(),
|
segment_.get(),
|
||||||
1000000,
|
100000,
|
||||||
MAX_TIMESTAMP,
|
MAX_TIMESTAMP,
|
||||||
std::make_shared<milvus::exec::QueryConfig>(
|
std::make_shared<milvus::exec::QueryConfig>(
|
||||||
std::unordered_map<std::string, std::string>{}));
|
std::unordered_map<std::string, std::string>{}));
|
||||||
@ -296,12 +299,12 @@ TEST_P(TaskTest, CompileInputs_and) {
|
|||||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
auto exprs = milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
auto exprs = milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||||
EXPECT_EQ(exprs.size(), 4);
|
EXPECT_EQ(exprs.size(), 4);
|
||||||
for (int i = 0; i < exprs.size(); ++i) {
|
for (int i = 0; i < exprs.size(); ++i) {
|
||||||
std::cout << exprs[i]->get_name() << std::endl;
|
std::cout << exprs[i]->name() << std::endl;
|
||||||
EXPECT_STREQ(exprs[i]->get_name().c_str(), "PhyUnaryRangeFilterExpr");
|
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -316,7 +319,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
|||||||
proto::plan::GenericValue val;
|
proto::plan::GenericValue val;
|
||||||
val.set_int64_val(10);
|
val.set_int64_val(10);
|
||||||
{
|
{
|
||||||
// expr: (int64_fid < 10 and int64_fid < 10) or (int64_fid < 10 and int64_fid < 10)
|
// expr: (int64_fid > 10 and int64_fid > 10) or (int64_fid > 10 and int64_fid > 10)
|
||||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
proto::plan::OpType::GreaterThan,
|
proto::plan::OpType::GreaterThan,
|
||||||
@ -342,19 +345,19 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
|||||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
||||||
auto exprs =
|
auto exprs =
|
||||||
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||||
EXPECT_EQ(exprs.size(), 2);
|
EXPECT_EQ(exprs.size(), 2);
|
||||||
for (int i = 0; i < exprs.size(); ++i) {
|
for (int i = 0; i < exprs.size(); ++i) {
|
||||||
std::cout << exprs[i]->get_name() << std::endl;
|
std::cout << exprs[i]->name() << std::endl;
|
||||||
EXPECT_STREQ(exprs[i]->get_name().c_str(), "and");
|
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid < 10 and int64_fid < 10)
|
// expr: (int64_fid < 10 or int64_fid < 10) or (int64_fid > 10 and int64_fid > 10)
|
||||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
proto::plan::OpType::GreaterThan,
|
proto::plan::OpType::GreaterThan,
|
||||||
@ -380,7 +383,7 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
|||||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
expr::LogicalBinaryExpr::OpType::Or, expr3, expr6);
|
||||||
auto exprs =
|
auto exprs =
|
||||||
@ -388,14 +391,13 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
|||||||
std::cout << exprs.size() << std::endl;
|
std::cout << exprs.size() << std::endl;
|
||||||
EXPECT_EQ(exprs.size(), 3);
|
EXPECT_EQ(exprs.size(), 3);
|
||||||
for (int i = 0; i < exprs.size() - 1; ++i) {
|
for (int i = 0; i < exprs.size() - 1; ++i) {
|
||||||
std::cout << exprs[i]->get_name() << std::endl;
|
std::cout << exprs[i]->name() << std::endl;
|
||||||
EXPECT_STREQ(exprs[i]->get_name().c_str(),
|
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||||
"PhyUnaryRangeFilterExpr");
|
|
||||||
}
|
}
|
||||||
EXPECT_STREQ(exprs[2]->get_name().c_str(), "and");
|
EXPECT_STREQ(exprs[2]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// expr: (int64_fid < 10 or int64_fid < 10) and (int64_fid < 10 and int64_fid < 10)
|
// expr: (int64_fid > 10 or int64_fid > 10) and (int64_fid > 10 and int64_fid > 10)
|
||||||
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
proto::plan::OpType::GreaterThan,
|
proto::plan::OpType::GreaterThan,
|
||||||
@ -421,18 +423,282 @@ TEST_P(TaskTest, CompileInputs_or_with_and) {
|
|||||||
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr6 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
DEAFULT_QUERY_ID, segment_.get(), 1000000, MAX_TIMESTAMP);
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
auto expr7 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
expr::LogicalBinaryExpr::OpType::And, expr3, expr6);
|
||||||
auto exprs =
|
auto exprs =
|
||||||
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
milvus::exec::CompileInputs(expr7, query_context.get(), {});
|
||||||
std::cout << exprs.size() << std::endl;
|
std::cout << exprs.size() << std::endl;
|
||||||
EXPECT_EQ(exprs.size(), 3);
|
EXPECT_EQ(exprs.size(), 3);
|
||||||
EXPECT_STREQ(exprs[0]->get_name().c_str(), "or");
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
for (int i = 1; i < exprs.size(); ++i) {
|
for (int i = 1; i < exprs.size(); ++i) {
|
||||||
std::cout << exprs[i]->get_name() << std::endl;
|
std::cout << exprs[i]->name() << std::endl;
|
||||||
EXPECT_STREQ(exprs[i]->get_name().c_str(),
|
EXPECT_STREQ(exprs[i]->name().c_str(), "PhyUnaryRangeFilterExpr");
|
||||||
"PhyUnaryRangeFilterExpr");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(TaskTest, Test_reorder) {
|
||||||
|
using namespace milvus;
|
||||||
|
using namespace milvus::query;
|
||||||
|
using namespace milvus::segcore;
|
||||||
|
using namespace milvus::exec;
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: string2 like '%xx' and string2 == 'xxx'
|
||||||
|
// reorder: string2 == "xxx" and string2 like '%xxx'
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("%xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Match,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_string_val("xxx");
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: string2 == 'xxx' and int1 < 100
|
||||||
|
// reorder: int1 < 100 and string2 == 'xxx'
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||||
|
proto::plan::OpType::LessThan,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: json['b'] like '%xx' and json['a'] == 'xxx'
|
||||||
|
// reorder: json['a'] == 'xxx' and json['b'] like '%xx'
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("%xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||||
|
proto::plan::OpType::Match,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_string_val("xxx");
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: json['a'] == 'xxx' and int1 == 100
|
||||||
|
// reorder: int1 == 100 and json['a'] == 'xxx'
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: json['a'] == 'xxx' and 0 < int1 < 100
|
||||||
|
// reorder: 0 < int1 < 100 and json['a'] == 'xxx'
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["json"], DataType::JSON),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue low;
|
||||||
|
low.set_int64_val(0);
|
||||||
|
proto::plan::GenericValue upper;
|
||||||
|
upper.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||||
|
low,
|
||||||
|
upper,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: string1 != string2 and 0 < int1 < 100
|
||||||
|
// reorder: 0 < int1 < 100 and string1 != string2
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::CompareExpr>(field_map_["string1"],
|
||||||
|
field_map_["string2"],
|
||||||
|
DataType::VARCHAR,
|
||||||
|
DataType::VARCHAR,
|
||||||
|
OpType::LessThan);
|
||||||
|
proto::plan::GenericValue low;
|
||||||
|
low.set_int64_val(0);
|
||||||
|
proto::plan::GenericValue upper;
|
||||||
|
upper.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::BinaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["int64"], DataType::INT64),
|
||||||
|
low,
|
||||||
|
upper,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 2);
|
||||||
|
EXPECT_EQ(reorder[0], 1);
|
||||||
|
EXPECT_EQ(reorder[1], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// expr: string2 like '%xx' and string2 == 'xxx'
|
||||||
|
// disable optimize expr, still remain sequence
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("%xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Match,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_string_val("xxx");
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(field_map_["string2"], DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment_.get(), 100000, MAX_TIMESTAMP);
|
||||||
|
ExecContext context(query_context.get());
|
||||||
|
OPTIMIZE_EXPR_ENABLED = false;
|
||||||
|
auto exprs =
|
||||||
|
milvus::exec::CompileExpressions({expr3}, &context, {}, false);
|
||||||
|
EXPECT_EQ(exprs.size(), 1);
|
||||||
|
EXPECT_STREQ(exprs[0]->name().c_str(), "PhyConjunctFilterExpr");
|
||||||
|
auto phy_expr =
|
||||||
|
std::static_pointer_cast<milvus::exec::PhyConjunctFilterExpr>(
|
||||||
|
exprs[0]);
|
||||||
|
std::cout << phy_expr->ToString() << std::endl;
|
||||||
|
auto reorder = phy_expr->GetReorder();
|
||||||
|
EXPECT_EQ(reorder.size(), 0);
|
||||||
|
OPTIMIZE_EXPR_ENABLED = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -3401,6 +3401,145 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
|
|||||||
std::cout << "end compare test" << std::endl;
|
std::cout << "end compare test" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(ExprTest, TestReorder) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto pk = schema->AddDebugField("id", DataType::INT64);
|
||||||
|
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
|
||||||
|
auto bool_1_fid = schema->AddDebugField("bool1", DataType::BOOL);
|
||||||
|
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
|
||||||
|
auto int8_1_fid = schema->AddDebugField("int81", DataType::INT8);
|
||||||
|
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
|
||||||
|
auto int16_1_fid = schema->AddDebugField("int161", DataType::INT16);
|
||||||
|
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
|
||||||
|
auto int32_1_fid = schema->AddDebugField("int321", DataType::INT32);
|
||||||
|
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto int64_1_fid = schema->AddDebugField("int641", DataType::INT64);
|
||||||
|
auto float_fid = schema->AddDebugField("float", DataType::FLOAT);
|
||||||
|
auto float_1_fid = schema->AddDebugField("float1", DataType::FLOAT);
|
||||||
|
auto double_fid = schema->AddDebugField("double", DataType::DOUBLE);
|
||||||
|
auto double_1_fid = schema->AddDebugField("double1", DataType::DOUBLE);
|
||||||
|
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||||
|
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
|
||||||
|
auto json_fid = schema->AddDebugField("json", DataType::JSON, false);
|
||||||
|
auto str_array_fid =
|
||||||
|
schema->AddDebugField("str_array", DataType::ARRAY, DataType::VARCHAR);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
|
||||||
|
auto seg = CreateSealedSegment(schema);
|
||||||
|
size_t N = 1000;
|
||||||
|
auto raw_data = DataGen(schema, N);
|
||||||
|
auto fields = schema->get_fields();
|
||||||
|
for (auto field_data : raw_data.raw_->fields_data()) {
|
||||||
|
int64_t field_id = field_data.field_id();
|
||||||
|
|
||||||
|
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||||
|
auto field_meta = fields.at(FieldId(field_id));
|
||||||
|
info.channel->push(
|
||||||
|
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||||
|
info.channel->close();
|
||||||
|
|
||||||
|
seg->LoadFieldData(FieldId(field_id), info);
|
||||||
|
}
|
||||||
|
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg, MAX_TIMESTAMP);
|
||||||
|
|
||||||
|
auto build_expr = [&](int index) -> expr::TypedExprPtr {
|
||||||
|
switch (index) {
|
||||||
|
case 0: {
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(str1_fid, DataType::VARCHAR),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
|
proto::plan::OpType::LessThan,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
return expr3;
|
||||||
|
};
|
||||||
|
case 1: {
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("xxx");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(json_fid, DataType::JSON, {"int"}),
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
|
proto::plan::OpType::LessThan,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
return expr3;
|
||||||
|
};
|
||||||
|
case 2: {
|
||||||
|
proto::plan::GenericValue val1;
|
||||||
|
val1.set_string_val("12");
|
||||||
|
auto expr1 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(str_array_fid, DataType::ARRAY, {"0"}),
|
||||||
|
proto::plan::OpType::Match,
|
||||||
|
val1,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
|
proto::plan::OpType::LessThan,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
return expr3;
|
||||||
|
};
|
||||||
|
case 3: {
|
||||||
|
auto expr1 =
|
||||||
|
std::make_shared<expr::CompareExpr>(int64_fid,
|
||||||
|
int64_1_fid,
|
||||||
|
DataType::INT64,
|
||||||
|
DataType::INT64,
|
||||||
|
OpType::LessThan);
|
||||||
|
proto::plan::GenericValue val2;
|
||||||
|
val2.set_int64_val(100);
|
||||||
|
auto expr2 = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||||
|
proto::plan::OpType::LessThan,
|
||||||
|
val2,
|
||||||
|
std::vector<proto::plan::GenericValue>{});
|
||||||
|
auto expr3 = std::make_shared<expr::LogicalBinaryExpr>(
|
||||||
|
expr::LogicalBinaryExpr::OpType::And, expr1, expr2);
|
||||||
|
return expr3;
|
||||||
|
};
|
||||||
|
default:
|
||||||
|
PanicInfo(ErrorCode::UnexpectedError, "not implement");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
BitsetType final;
|
||||||
|
auto expr = build_expr(0);
|
||||||
|
auto plan =
|
||||||
|
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||||
|
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
expr = build_expr(1);
|
||||||
|
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||||
|
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
expr = build_expr(2);
|
||||||
|
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||||
|
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
expr = build_expr(3);
|
||||||
|
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
|
||||||
|
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(ExprTest, TestCompareExprNullable) {
|
TEST_P(ExprTest, TestCompareExprNullable) {
|
||||||
auto schema = std::make_shared<Schema>();
|
auto schema = std::make_shared<Schema>();
|
||||||
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);
|
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);
|
||||||
|
|||||||
@ -242,6 +242,9 @@ func (node *QueryNode) InitSegcore() error {
|
|||||||
cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64())
|
cExprBatchSize := C.int64_t(paramtable.Get().QueryNodeCfg.ExprEvalBatchSize.GetAsInt64())
|
||||||
C.InitDefaultExprEvalBatchSize(cExprBatchSize)
|
C.InitDefaultExprEvalBatchSize(cExprBatchSize)
|
||||||
|
|
||||||
|
cOptimizeExprEnabled := C.bool(paramtable.Get().CommonCfg.EnabledOptimizeExpr.GetAsBool())
|
||||||
|
C.InitDefaultOptimizeExprEnable(cOptimizeExprEnabled)
|
||||||
|
|
||||||
cGpuMemoryPoolInitSize := C.uint32_t(paramtable.Get().GpuConfig.InitSize.GetAsUint32())
|
cGpuMemoryPoolInitSize := C.uint32_t(paramtable.Get().GpuConfig.InitSize.GetAsUint32())
|
||||||
cGpuMemoryPoolMaxSize := C.uint32_t(paramtable.Get().GpuConfig.MaxSize.GetAsUint32())
|
cGpuMemoryPoolMaxSize := C.uint32_t(paramtable.Get().GpuConfig.MaxSize.GetAsUint32())
|
||||||
C.SegcoreSetKnowhereGpuMemoryPoolSize(cGpuMemoryPoolInitSize, cGpuMemoryPoolMaxSize)
|
C.SegcoreSetKnowhereGpuMemoryPoolSize(cGpuMemoryPoolInitSize, cGpuMemoryPoolMaxSize)
|
||||||
|
|||||||
@ -292,6 +292,8 @@ type commonConfig struct {
|
|||||||
LocalRPCEnabled ParamItem `refreshable:"false"`
|
LocalRPCEnabled ParamItem `refreshable:"false"`
|
||||||
|
|
||||||
SyncTaskPoolReleaseTimeoutSeconds ParamItem `refreshable:"true"`
|
SyncTaskPoolReleaseTimeoutSeconds ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
|
EnabledOptimizeExpr ParamItem `refreshable:"true"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *commonConfig) init(base *BaseTable) {
|
func (p *commonConfig) init(base *BaseTable) {
|
||||||
@ -994,6 +996,15 @@ This helps Milvus-CDC synchronize incremental data`,
|
|||||||
Export: true,
|
Export: true,
|
||||||
}
|
}
|
||||||
p.SyncTaskPoolReleaseTimeoutSeconds.Init(base.mgr)
|
p.SyncTaskPoolReleaseTimeoutSeconds.Init(base.mgr)
|
||||||
|
|
||||||
|
p.EnabledOptimizeExpr = ParamItem{
|
||||||
|
Key: "common.enabledOptimizeExpr",
|
||||||
|
Version: "2.5.6",
|
||||||
|
DefaultValue: "true",
|
||||||
|
Doc: "Indicates whether to enable optimize expr",
|
||||||
|
Export: true,
|
||||||
|
}
|
||||||
|
p.EnabledOptimizeExpr.Init(base.mgr)
|
||||||
}
|
}
|
||||||
|
|
||||||
type gpuConfig struct {
|
type gpuConfig struct {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user