mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: optimize expr performace for some points (#39695)
1. skip get expr arguments which deserialize proto for every batch execute. 2. replace unordered_set with sort array that has better performace for small set. #39688 Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
d827dd8b2f
commit
8a9f02ef71
@ -123,14 +123,21 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ValueType>(expr_->value_);
|
||||
if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) {
|
||||
right_operand_arg_.SetValue(ValueType());
|
||||
} else {
|
||||
right_operand_arg_.SetValue<ValueType>(expr_->right_operand_);
|
||||
}
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto op_type = expr_->op_type_;
|
||||
auto arith_type = expr_->arith_op_type_;
|
||||
auto value = GetValueFromProto<ValueType>(expr_->value_);
|
||||
auto right_operand =
|
||||
arith_type != proto::plan::ArithOpType::ArrayLength
|
||||
? GetValueFromProto<ValueType>(expr_->right_operand_)
|
||||
: ValueType();
|
||||
auto value = value_arg_.GetValue<ValueType>();
|
||||
auto right_operand = right_operand_arg_.GetValue<ValueType>();
|
||||
|
||||
#define BinaryArithRangeJSONCompare(cmp) \
|
||||
do { \
|
||||
@ -514,6 +521,17 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
|
||||
ValueType>;
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ValueType>(expr_->value_);
|
||||
if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) {
|
||||
right_operand_arg_.SetValue(ValueType());
|
||||
} else {
|
||||
right_operand_arg_.SetValue<ValueType>(expr_->right_operand_);
|
||||
}
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -529,11 +547,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
|
||||
}
|
||||
auto op_type = expr_->op_type_;
|
||||
auto arith_type = expr_->arith_op_type_;
|
||||
auto value = GetValueFromProto<ValueType>(expr_->value_);
|
||||
auto right_operand =
|
||||
arith_type != proto::plan::ArithOpType::ArrayLength
|
||||
? GetValueFromProto<ValueType>(expr_->right_operand_)
|
||||
: ValueType();
|
||||
auto value = value_arg_.GetValue<ValueType>();
|
||||
auto right_operand = right_operand_arg_.GetValue<ValueType>();
|
||||
|
||||
#define BinaryArithRangeArrayCompare(cmp) \
|
||||
do { \
|
||||
@ -898,9 +913,14 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex(
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
|
||||
auto right_operand =
|
||||
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
|
||||
right_operand_arg_.SetValue<HighPrecisionType>(expr_->right_operand_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto value = value_arg_.GetValue<HighPrecisionType>();
|
||||
auto right_operand = right_operand_arg_.GetValue<HighPrecisionType>();
|
||||
auto op_type = expr_->op_type_;
|
||||
auto arith_type = expr_->arith_op_type_;
|
||||
auto sub_batch_size = has_offset_input_ ? input->size() : size_per_chunk_;
|
||||
@ -1415,15 +1435,20 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
|
||||
auto right_operand =
|
||||
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
|
||||
right_operand_arg_.SetValue<HighPrecisionType>(expr_->right_operand_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto value = value_arg_.GetValue<HighPrecisionType>();
|
||||
auto right_operand = right_operand_arg_.GetValue<HighPrecisionType>();
|
||||
auto op_type = expr_->op_type_;
|
||||
auto arith_type = expr_->arith_op_type_;
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "common/Vector.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "exec/expression/Element.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
@ -485,6 +486,10 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::BinaryArithOpEvalRangeExpr> expr_;
|
||||
SingleElement right_operand_arg_;
|
||||
SingleElement value_arg_;
|
||||
bool arg_inited_{false};
|
||||
};
|
||||
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
@ -143,9 +143,14 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
||||
OffsetVector* input) {
|
||||
lower_inclusive = expr_->lower_inclusive_;
|
||||
upper_inclusive = expr_->upper_inclusive_;
|
||||
val1 = GetValueFromProto<HighPrecisionType>(expr_->lower_val_);
|
||||
val2 = GetValueFromProto<HighPrecisionType>(expr_->upper_val_);
|
||||
|
||||
if (!arg_inited_) {
|
||||
lower_arg_.SetValue<HighPrecisionType>(expr_->lower_val_);
|
||||
upper_arg_.SetValue<HighPrecisionType>(expr_->upper_val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
val1 = lower_arg_.GetValue<HighPrecisionType>();
|
||||
val2 = upper_arg_.GetValue<HighPrecisionType>();
|
||||
auto get_next_overflow_batch =
|
||||
[this](OffsetVector* input) -> ColumnVectorPtr {
|
||||
int64_t batch_size;
|
||||
@ -358,8 +363,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
|
||||
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
|
||||
if (!arg_inited_) {
|
||||
lower_arg_.SetValue<ValueType>(expr_->lower_val_);
|
||||
upper_arg_.SetValue<ValueType>(expr_->upper_val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
ValueType val1 = lower_arg_.GetValue<ValueType>();
|
||||
ValueType val2 = upper_arg_.GetValue<ValueType>();
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
auto execute_sub_batch =
|
||||
@ -464,8 +474,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
|
||||
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
|
||||
|
||||
if (!arg_inited_) {
|
||||
lower_arg_.SetValue<ValueType>(expr_->lower_val_);
|
||||
upper_arg_.SetValue<ValueType>(expr_->upper_val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
ValueType val1 = lower_arg_.GetValue<ValueType>();
|
||||
ValueType val2 = upper_arg_.GetValue<ValueType>();
|
||||
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "common/Vector.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "exec/expression/Element.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -278,6 +279,9 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||
int64_t overflow_check_pos_{0};
|
||||
SingleElement lower_arg_;
|
||||
SingleElement upper_arg_;
|
||||
bool arg_inited_{false};
|
||||
};
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
263
internal/core/src/exec/expression/Element.h
Normal file
263
internal/core/src/exec/expression/Element.h
Normal file
@ -0,0 +1,263 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/Types.h"
|
||||
#include "exec/expression/EvalCtx.h"
|
||||
#include "exec/expression/VectorFunction.h"
|
||||
#include "exec/expression/Utils.h"
|
||||
#include "exec/QueryContext.h"
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "query/PlanProto.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
|
||||
class BaseElement {
|
||||
public:
|
||||
virtual ~BaseElement() = default;
|
||||
};
|
||||
|
||||
class SingleElement : public BaseElement {
|
||||
public:
|
||||
using ValueType = std::variant<std::monostate,
|
||||
bool,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
float,
|
||||
double,
|
||||
std::string,
|
||||
proto::plan::Array>;
|
||||
|
||||
SingleElement() = default;
|
||||
virtual ~SingleElement() = default;
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
SetValue(const proto::plan::GenericValue& value) {
|
||||
value_ = GetValueFromProto<T>(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
SetValue(const T& value) {
|
||||
if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, int16_t> ||
|
||||
std::is_same_v<T, int32_t> ||
|
||||
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
|
||||
std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, std::string>) {
|
||||
value_ = value;
|
||||
} else {
|
||||
static_assert(sizeof(T) == 0,
|
||||
"Type not supported in SingleElement");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
GetValue() const {
|
||||
try {
|
||||
return std::get<T>(value_);
|
||||
} catch (const std::bad_variant_access& e) {
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"SingleElement GetValue() failed: {}",
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
ValueType value_;
|
||||
};
|
||||
|
||||
class MultiElement : public BaseElement {
|
||||
public:
|
||||
using ValueType = std::variant<std::monostate,
|
||||
bool,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
float,
|
||||
double,
|
||||
std::string,
|
||||
std::string_view>;
|
||||
|
||||
MultiElement() = default;
|
||||
virtual ~MultiElement() = default;
|
||||
|
||||
virtual bool
|
||||
In(const ValueType& value) const = 0;
|
||||
|
||||
virtual bool
|
||||
Empty() const = 0;
|
||||
|
||||
virtual size_t
|
||||
Size() const = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SortVectorElement : public MultiElement {
|
||||
public:
|
||||
explicit SortVectorElement(
|
||||
const std::vector<proto::plan::GenericValue>& values) {
|
||||
for (auto& value : values) {
|
||||
values_.push_back(GetValueFromProto<T>(value));
|
||||
}
|
||||
std::sort(values_.begin(), values_.end());
|
||||
sorted_ = true;
|
||||
}
|
||||
|
||||
explicit SortVectorElement(const std::vector<T>& values) {
|
||||
for (const auto& value : values) {
|
||||
values_.push_back(value);
|
||||
}
|
||||
std::sort(values_.begin(), values_.end());
|
||||
sorted_ = true;
|
||||
}
|
||||
|
||||
bool
|
||||
Empty() const override {
|
||||
return values_.empty();
|
||||
}
|
||||
|
||||
size_t
|
||||
Size() const override {
|
||||
return values_.size();
|
||||
}
|
||||
|
||||
bool
|
||||
In(const ValueType& value) const override {
|
||||
AssertInfo(sorted_, "In() should be sorted before");
|
||||
if (std::holds_alternative<T>(value)) {
|
||||
return std::binary_search(
|
||||
values_.begin(), values_.end(), std::get<T>(value));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
Sort() {
|
||||
std::sort(values_.begin(), values_.end());
|
||||
sorted_ = true;
|
||||
}
|
||||
|
||||
void
|
||||
AddElement(const T& value) {
|
||||
values_.push_back(value);
|
||||
}
|
||||
|
||||
public:
|
||||
std::vector<T> values_;
|
||||
bool sorted_{false};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class FlatVectorElement : public MultiElement {
|
||||
public:
|
||||
explicit FlatVectorElement(
|
||||
const std::vector<proto::plan::GenericValue>& values) {
|
||||
for (auto& value : values) {
|
||||
values_.push_back(GetValueFromProto<T>(value));
|
||||
}
|
||||
}
|
||||
|
||||
explicit FlatVectorElement(const std::vector<T>& values) {
|
||||
for (const auto& value : values) {
|
||||
values_.push_back(value);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Empty() const override {
|
||||
return values_.empty();
|
||||
}
|
||||
|
||||
bool
|
||||
In(const ValueType& value) const override {
|
||||
if (std::holds_alternative<T>(value)) {
|
||||
for (const auto& v : values_) {
|
||||
if (v == value)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t
|
||||
Size() const override {
|
||||
return values_.size();
|
||||
}
|
||||
|
||||
void
|
||||
AddElement(const T& value) {
|
||||
values_.push_back(value);
|
||||
}
|
||||
|
||||
public:
|
||||
std::vector<T> values_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SetElement : public MultiElement {
|
||||
public:
|
||||
explicit SetElement(const std::vector<proto::plan::GenericValue>& values) {
|
||||
for (auto& value : values) {
|
||||
values_.insert(GetValueFromProto<T>(value));
|
||||
}
|
||||
}
|
||||
|
||||
explicit SetElement(const std::vector<T>& values) {
|
||||
for (const auto& value : values) {
|
||||
values_.insert(value);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Empty() const override {
|
||||
return values_.empty();
|
||||
}
|
||||
|
||||
bool
|
||||
In(const ValueType& value) const override {
|
||||
if (std::holds_alternative<T>(value)) {
|
||||
return values_.count(std::get<T>(value)) > 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AddElement(const T& value) {
|
||||
values_.insert(value);
|
||||
}
|
||||
|
||||
size_t
|
||||
Size() const override {
|
||||
return values_.size();
|
||||
}
|
||||
|
||||
public:
|
||||
std::set<T> values_;
|
||||
};
|
||||
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
@ -183,9 +183,9 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
@ -195,11 +195,11 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
const std::shared_ptr<MultiElement>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
const auto& array = data[i];
|
||||
for (int j = 0; j < array.length(); ++j) {
|
||||
if (elements.count(array.template get_data<GetType>(j)) > 0) {
|
||||
if (elements->In(array.template get_data<GetType>(j))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -226,10 +226,10 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
|
||||
input,
|
||||
res,
|
||||
valid_res,
|
||||
elements);
|
||||
arg_set_);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -258,10 +258,10 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
@ -272,7 +272,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
const std::shared_ptr<MultiElement>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
@ -284,7 +284,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (elements.count(val.value()) > 0) {
|
||||
if (elements->In(val.value()) > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -311,14 +311,14 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
arg_set_);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
arg_set_);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -442,7 +442,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
std::set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
@ -455,9 +455,9 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
const std::set<GetType>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
std::unordered_set<GetType> tmp_elements(elements);
|
||||
std::set<GetType> tmp_elements(elements);
|
||||
// Note: array can only be iterated once
|
||||
for (int j = 0; j < data[i].length(); ++j) {
|
||||
tmp_elements.erase(data[i].template get_data<GetType>(j));
|
||||
@ -521,7 +521,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::unordered_set<GetType> elements;
|
||||
std::set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
@ -535,14 +535,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
const std::set<GetType>& elements) {
|
||||
auto executor = [&](const size_t i) -> bool {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
std::unordered_set<GetType> tmp_elements(elements);
|
||||
std::set<GetType> tmp_elements(elements);
|
||||
// Note: array can only be iterated once
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<GetType>();
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "common/Vector.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "exec/expression/Element.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -90,6 +91,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::JsonContainsExpr> expr_;
|
||||
bool arg_inited_{false};
|
||||
std::shared_ptr<MultiElement> arg_set_;
|
||||
};
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
@ -271,7 +271,11 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||
if (!arg_inited_) {
|
||||
arg_val_.SetValue<ValueType>(expr_->vals_[0]);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto target_val = arg_val_.GetValue<ValueType>();
|
||||
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
@ -348,12 +352,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||
}
|
||||
std::unordered_set<ValueType> term_set;
|
||||
for (const auto& element : expr_->vals_) {
|
||||
term_set.insert(GetValueFromProto<ValueType>(element));
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
if (term_set.empty()) {
|
||||
if (arg_set_->Empty()) {
|
||||
res.reset();
|
||||
MoveCursor();
|
||||
return res_vec;
|
||||
@ -368,7 +372,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
int index,
|
||||
const std::unordered_set<ValueType>& term_set) {
|
||||
const std::shared_ptr<MultiElement>& term_set) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -378,12 +382,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (term_set.empty() || index >= data[offset].length()) {
|
||||
if (term_set->Empty() || index >= data[offset].length()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
auto value = data[offset].get_data<GetType>(index);
|
||||
res[i] = term_set.find(ValueType(value)) != term_set.end();
|
||||
res[i] = term_set->In(ValueType(value));
|
||||
}
|
||||
};
|
||||
|
||||
@ -396,14 +400,14 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||
res,
|
||||
valid_res,
|
||||
index,
|
||||
term_set);
|
||||
arg_set_);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
index,
|
||||
term_set);
|
||||
arg_set_);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -433,7 +437,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
ValueType val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||
if (!arg_inited_) {
|
||||
arg_val_.SetValue<ValueType>(expr_->vals_[0]);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto val = arg_val_.GetValue<ValueType>();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
auto execute_sub_batch =
|
||||
@ -514,12 +523,12 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::unordered_set<ValueType> term_set;
|
||||
for (const auto& element : expr_->vals_) {
|
||||
term_set.insert(GetValueFromProto<ValueType>(element));
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
if (term_set.empty()) {
|
||||
if (arg_set_->Empty()) {
|
||||
res.reset();
|
||||
MoveCursor();
|
||||
return res_vec;
|
||||
@ -534,7 +543,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string pointer,
|
||||
const std::unordered_set<ValueType>& terms) {
|
||||
const std::shared_ptr<MultiElement>& terms) {
|
||||
auto executor = [&](size_t i) {
|
||||
auto x = data[i].template at<GetType>(pointer);
|
||||
if (x.error()) {
|
||||
@ -547,11 +556,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
auto value = x.value();
|
||||
// if the term set is {1}, and the value is 1.1, we should not return true.
|
||||
return std::floor(value) == value &&
|
||||
terms.find(ValueType(value)) != terms.end();
|
||||
terms->In(ValueType(x.value()));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return terms.find(ValueType(x.value())) != terms.end();
|
||||
return terms->In(ValueType(x.value()));
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
@ -562,7 +571,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (terms.empty()) {
|
||||
if (terms->Empty()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
@ -577,14 +586,14 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
term_set);
|
||||
arg_set_);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
term_set);
|
||||
arg_set_);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -676,16 +685,21 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::vector<T> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
// Integral overflow process
|
||||
bool overflowed = false;
|
||||
auto converted_val = GetValueFromProtoWithOverflow<T>(val, overflowed);
|
||||
if (!overflowed) {
|
||||
vals.emplace_back(converted_val);
|
||||
if (!arg_inited_) {
|
||||
std::vector<T> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
// Integral overflow process
|
||||
bool overflowed = false;
|
||||
auto converted_val =
|
||||
GetValueFromProtoWithOverflow<T>(val, overflowed);
|
||||
if (!overflowed) {
|
||||
vals.emplace_back(converted_val);
|
||||
}
|
||||
}
|
||||
arg_set_ = std::make_shared<SortVectorElement<T>>(vals);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
||||
|
||||
auto execute_sub_batch =
|
||||
[]<FilterType filter_type = FilterType::sequential>(
|
||||
const T* data,
|
||||
@ -694,8 +708,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<T>& vals) {
|
||||
TermElementFuncSet<T> func;
|
||||
const std::shared_ptr<MultiElement>& vals) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
if constexpr (filter_type == FilterType::random) {
|
||||
@ -705,7 +718,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = func(vals, data[offset]);
|
||||
res[i] = vals->In(data[offset]);
|
||||
}
|
||||
};
|
||||
int64_t processed_size;
|
||||
@ -715,10 +728,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||
input,
|
||||
res,
|
||||
valid_res,
|
||||
vals_set);
|
||||
arg_set_);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<T>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "common/Vector.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "exec/expression/Element.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -123,6 +124,9 @@ class PhyTermFilterExpr : public SegmentExpr {
|
||||
milvus::Timestamp query_timestamp_;
|
||||
bool cached_bits_inited_{false};
|
||||
TargetBitmap cached_bits_;
|
||||
bool arg_inited_{false};
|
||||
std::shared_ptr<MultiElement> arg_set_;
|
||||
SingleElement arg_val_;
|
||||
};
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
@ -297,7 +297,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
ValueType val = GetValueFromProto<ValueType>(expr_->val_);
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ValueType>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
ValueType val = value_arg_.GetValue<ValueType>();
|
||||
auto op_type = expr_->op_type_;
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
@ -506,7 +510,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
||||
};
|
||||
} else {
|
||||
auto size_per_chunk = segment_->size_per_chunk();
|
||||
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
|
||||
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{
|
||||
auto chunk_idx = offset / size_per_chunk;
|
||||
auto chunk_offset = offset % size_per_chunk;
|
||||
const auto& chunk =
|
||||
@ -586,7 +590,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<ExprValueType>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
ExprValueType val = value_arg_.GetValue<ExprValueType>();
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
@ -847,6 +856,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
using Index = index::ScalarIndex<IndexInnerType>;
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<IndexInnerType>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
if (auto res = PreCheckOverflow<T>()) {
|
||||
return res;
|
||||
}
|
||||
@ -907,7 +920,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
||||
}
|
||||
return res;
|
||||
};
|
||||
auto val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||
IndexInnerType val = value_arg_.GetValue<IndexInnerType>();
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, val);
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -996,6 +1009,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<IndexInnerType>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
@ -1123,7 +1140,12 @@ PhyUnaryRangeFilterExpr::CanUseIndexForJson() {
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
||||
using Index = index::TextMatchIndex;
|
||||
auto query = GetValueFromProto<std::string>(expr_->val_);
|
||||
if (!arg_inited_) {
|
||||
value_arg_.SetValue<std::string>(expr_->val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto query = value_arg_.GetValue<std::string>();
|
||||
|
||||
int64_t slop = 0;
|
||||
if (expr_->op_type_ == proto::plan::PhraseMatch) {
|
||||
// It should be larger than 0 in normal cases. Check it incase of receiving old version proto.
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "common/Vector.h"
|
||||
#include "exec/expression/Expr.h"
|
||||
#include "exec/expression/Element.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
@ -389,6 +390,8 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr> expr_;
|
||||
int64_t overflow_check_pos_{0};
|
||||
bool arg_inited_{false};
|
||||
SingleElement value_arg_;
|
||||
};
|
||||
} // namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user