enhance: optimize expr performace for some points (#39695)

1. skip get expr arguments which deserialize proto for every batch
execute.
2. replace unordered_set with sort array that has better performace for
small set.

#39688

Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-02-16 20:32:14 +08:00 committed by GitHub
parent d827dd8b2f
commit 8a9f02ef71
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 440 additions and 81 deletions

View File

@ -123,14 +123,21 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson(
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) {
value_arg_.SetValue<ValueType>(expr_->value_);
if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) {
right_operand_arg_.SetValue(ValueType());
} else {
right_operand_arg_.SetValue<ValueType>(expr_->right_operand_);
}
arg_inited_ = true;
}
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto op_type = expr_->op_type_;
auto arith_type = expr_->arith_op_type_;
auto value = GetValueFromProto<ValueType>(expr_->value_);
auto right_operand =
arith_type != proto::plan::ArithOpType::ArrayLength
? GetValueFromProto<ValueType>(expr_->right_operand_)
: ValueType();
auto value = value_arg_.GetValue<ValueType>();
auto right_operand = right_operand_arg_.GetValue<ValueType>();
#define BinaryArithRangeJSONCompare(cmp) \
do { \
@ -514,6 +521,17 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
ValueType>;
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (!arg_inited_) {
value_arg_.SetValue<ValueType>(expr_->value_);
if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) {
right_operand_arg_.SetValue(ValueType());
} else {
right_operand_arg_.SetValue<ValueType>(expr_->right_operand_);
}
arg_inited_ = true;
}
if (real_batch_size == 0) {
return nullptr;
}
@ -529,11 +547,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray(
}
auto op_type = expr_->op_type_;
auto arith_type = expr_->arith_op_type_;
auto value = GetValueFromProto<ValueType>(expr_->value_);
auto right_operand =
arith_type != proto::plan::ArithOpType::ArrayLength
? GetValueFromProto<ValueType>(expr_->right_operand_)
: ValueType();
auto value = value_arg_.GetValue<ValueType>();
auto right_operand = right_operand_arg_.GetValue<ValueType>();
#define BinaryArithRangeArrayCompare(cmp) \
do { \
@ -898,9 +913,14 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex(
if (real_batch_size == 0) {
return nullptr;
}
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
auto right_operand =
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
if (!arg_inited_) {
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
right_operand_arg_.SetValue<HighPrecisionType>(expr_->right_operand_);
arg_inited_ = true;
}
auto value = value_arg_.GetValue<HighPrecisionType>();
auto right_operand = right_operand_arg_.GetValue<HighPrecisionType>();
auto op_type = expr_->op_type_;
auto arith_type = expr_->arith_op_type_;
auto sub_batch_size = has_offset_input_ ? input->size() : size_per_chunk_;
@ -1415,15 +1435,20 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData(
return nullptr;
}
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
auto right_operand =
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (!arg_inited_) {
value_arg_.SetValue<HighPrecisionType>(expr_->value_);
right_operand_arg_.SetValue<HighPrecisionType>(expr_->right_operand_);
arg_inited_ = true;
}
auto value = value_arg_.GetValue<HighPrecisionType>();
auto right_operand = right_operand_arg_.GetValue<HighPrecisionType>();
auto op_type = expr_->op_type_;
auto arith_type = expr_->arith_op_type_;

View File

@ -24,6 +24,7 @@
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "segcore/SegmentInterface.h"
#include "exec/expression/Element.h"
namespace milvus {
namespace exec {
@ -485,6 +486,10 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
private:
std::shared_ptr<const milvus::expr::BinaryArithOpEvalRangeExpr> expr_;
SingleElement right_operand_arg_;
SingleElement value_arg_;
bool arg_inited_{false};
};
} //namespace exec
} // namespace milvus

View File

@ -143,9 +143,14 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
OffsetVector* input) {
lower_inclusive = expr_->lower_inclusive_;
upper_inclusive = expr_->upper_inclusive_;
val1 = GetValueFromProto<HighPrecisionType>(expr_->lower_val_);
val2 = GetValueFromProto<HighPrecisionType>(expr_->upper_val_);
if (!arg_inited_) {
lower_arg_.SetValue<HighPrecisionType>(expr_->lower_val_);
upper_arg_.SetValue<HighPrecisionType>(expr_->upper_val_);
arg_inited_ = true;
}
val1 = lower_arg_.GetValue<HighPrecisionType>();
val2 = upper_arg_.GetValue<HighPrecisionType>();
auto get_next_overflow_batch =
[this](OffsetVector* input) -> ColumnVectorPtr {
int64_t batch_size;
@ -358,8 +363,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
if (!arg_inited_) {
lower_arg_.SetValue<ValueType>(expr_->lower_val_);
upper_arg_.SetValue<ValueType>(expr_->upper_val_);
arg_inited_ = true;
}
ValueType val1 = lower_arg_.GetValue<ValueType>();
ValueType val2 = upper_arg_.GetValue<ValueType>();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto execute_sub_batch =
@ -464,8 +474,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
if (!arg_inited_) {
lower_arg_.SetValue<ValueType>(expr_->lower_val_);
upper_arg_.SetValue<ValueType>(expr_->upper_val_);
arg_inited_ = true;
}
ValueType val1 = lower_arg_.GetValue<ValueType>();
ValueType val2 = upper_arg_.GetValue<ValueType>();
int index = -1;
if (expr_->column_.nested_path_.size() > 0) {
index = std::stoi(expr_->column_.nested_path_[0]);

View File

@ -22,6 +22,7 @@
#include "common/Types.h"
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "exec/expression/Element.h"
#include "segcore/SegmentInterface.h"
namespace milvus {
@ -278,6 +279,9 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
private:
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
int64_t overflow_check_pos_{0};
SingleElement lower_arg_;
SingleElement upper_arg_;
bool arg_inited_{false};
};
} //namespace exec
} // namespace milvus

View File

@ -0,0 +1,263 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include "common/Types.h"
#include "exec/expression/EvalCtx.h"
#include "exec/expression/VectorFunction.h"
#include "exec/expression/Utils.h"
#include "exec/QueryContext.h"
#include "expr/ITypeExpr.h"
#include "query/PlanProto.h"
namespace milvus {
namespace exec {
class BaseElement {
public:
virtual ~BaseElement() = default;
};
class SingleElement : public BaseElement {
public:
using ValueType = std::variant<std::monostate,
bool,
int8_t,
int16_t,
int32_t,
int64_t,
float,
double,
std::string,
proto::plan::Array>;
SingleElement() = default;
virtual ~SingleElement() = default;
template <typename T>
void
SetValue(const proto::plan::GenericValue& value) {
value_ = GetValueFromProto<T>(value);
}
template <typename T>
void
SetValue(const T& value) {
if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> ||
std::is_same_v<T, int16_t> ||
std::is_same_v<T, int32_t> ||
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
std::is_same_v<T, double> ||
std::is_same_v<T, std::string>) {
value_ = value;
} else {
static_assert(sizeof(T) == 0,
"Type not supported in SingleElement");
}
}
template <typename T>
T
GetValue() const {
try {
return std::get<T>(value_);
} catch (const std::bad_variant_access& e) {
PanicInfo(ErrorCode::UnexpectedError,
"SingleElement GetValue() failed: {}",
e.what());
}
}
public:
ValueType value_;
};
class MultiElement : public BaseElement {
public:
using ValueType = std::variant<std::monostate,
bool,
int8_t,
int16_t,
int32_t,
int64_t,
float,
double,
std::string,
std::string_view>;
MultiElement() = default;
virtual ~MultiElement() = default;
virtual bool
In(const ValueType& value) const = 0;
virtual bool
Empty() const = 0;
virtual size_t
Size() const = 0;
};
template <typename T>
class SortVectorElement : public MultiElement {
public:
explicit SortVectorElement(
const std::vector<proto::plan::GenericValue>& values) {
for (auto& value : values) {
values_.push_back(GetValueFromProto<T>(value));
}
std::sort(values_.begin(), values_.end());
sorted_ = true;
}
explicit SortVectorElement(const std::vector<T>& values) {
for (const auto& value : values) {
values_.push_back(value);
}
std::sort(values_.begin(), values_.end());
sorted_ = true;
}
bool
Empty() const override {
return values_.empty();
}
size_t
Size() const override {
return values_.size();
}
bool
In(const ValueType& value) const override {
AssertInfo(sorted_, "In() should be sorted before");
if (std::holds_alternative<T>(value)) {
return std::binary_search(
values_.begin(), values_.end(), std::get<T>(value));
}
return false;
}
void
Sort() {
std::sort(values_.begin(), values_.end());
sorted_ = true;
}
void
AddElement(const T& value) {
values_.push_back(value);
}
public:
std::vector<T> values_;
bool sorted_{false};
};
template <typename T>
class FlatVectorElement : public MultiElement {
public:
explicit FlatVectorElement(
const std::vector<proto::plan::GenericValue>& values) {
for (auto& value : values) {
values_.push_back(GetValueFromProto<T>(value));
}
}
explicit FlatVectorElement(const std::vector<T>& values) {
for (const auto& value : values) {
values_.push_back(value);
}
}
bool
Empty() const override {
return values_.empty();
}
bool
In(const ValueType& value) const override {
if (std::holds_alternative<T>(value)) {
for (const auto& v : values_) {
if (v == value)
return true;
}
}
return false;
}
size_t
Size() const override {
return values_.size();
}
void
AddElement(const T& value) {
values_.push_back(value);
}
public:
std::vector<T> values_;
};
template <typename T>
class SetElement : public MultiElement {
public:
explicit SetElement(const std::vector<proto::plan::GenericValue>& values) {
for (auto& value : values) {
values_.insert(GetValueFromProto<T>(value));
}
}
explicit SetElement(const std::vector<T>& values) {
for (const auto& value : values) {
values_.insert(value);
}
}
bool
Empty() const override {
return values_.empty();
}
bool
In(const ValueType& value) const override {
if (std::holds_alternative<T>(value)) {
return values_.count(std::get<T>(value)) > 0;
}
}
void
AddElement(const T& value) {
values_.insert(value);
}
size_t
Size() const override {
return values_.size();
}
public:
std::set<T> values_;
};
} //namespace exec
} // namespace milvus

View File

@ -183,9 +183,9 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
std::unordered_set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
arg_inited_ = true;
}
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
@ -195,11 +195,11 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::unordered_set<GetType>& elements) {
const std::shared_ptr<MultiElement>& elements) {
auto executor = [&](size_t i) {
const auto& array = data[i];
for (int j = 0; j < array.length(); ++j) {
if (elements.count(array.template get_data<GetType>(j)) > 0) {
if (elements->In(array.template get_data<GetType>(j))) {
return true;
}
}
@ -226,10 +226,10 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) {
input,
res,
valid_res,
elements);
arg_set_);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -258,10 +258,10 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
std::unordered_set<GetType> elements;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
arg_inited_ = true;
}
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
@ -272,7 +272,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer,
const std::unordered_set<GetType>& elements) {
const std::shared_ptr<MultiElement>& elements) {
auto executor = [&](size_t i) {
auto doc = data[i].doc();
auto array = doc.at_pointer(pointer).get_array();
@ -284,7 +284,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
if (val.error()) {
continue;
}
if (elements.count(val.value()) > 0) {
if (elements->In(val.value()) > 0) {
return true;
}
}
@ -311,14 +311,14 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) {
res,
valid_res,
pointer,
elements);
arg_set_);
} else {
processed_size = ProcessDataChunks<Json>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
pointer,
elements);
arg_set_);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -442,7 +442,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
std::unordered_set<GetType> elements;
std::set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
}
@ -455,9 +455,9 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) {
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::unordered_set<GetType>& elements) {
const std::set<GetType>& elements) {
auto executor = [&](size_t i) {
std::unordered_set<GetType> tmp_elements(elements);
std::set<GetType> tmp_elements(elements);
// Note: array can only be iterated once
for (int j = 0; j < data[i].length(); ++j) {
tmp_elements.erase(data[i].template get_data<GetType>(j));
@ -521,7 +521,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::unordered_set<GetType> elements;
std::set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
}
@ -535,14 +535,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer,
const std::unordered_set<GetType>& elements) {
const std::set<GetType>& elements) {
auto executor = [&](const size_t i) -> bool {
auto doc = data[i].doc();
auto array = doc.at_pointer(pointer).get_array();
if (array.error()) {
return false;
}
std::unordered_set<GetType> tmp_elements(elements);
std::set<GetType> tmp_elements(elements);
// Note: array can only be iterated once
for (auto&& it : array) {
auto val = it.template get<GetType>();

View File

@ -22,6 +22,7 @@
#include "common/Types.h"
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "exec/expression/Element.h"
#include "segcore/SegmentInterface.h"
namespace milvus {
@ -90,6 +91,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
private:
std::shared_ptr<const milvus::expr::JsonContainsExpr> expr_;
bool arg_inited_{false};
std::shared_ptr<MultiElement> arg_set_;
};
} //namespace exec
} // namespace milvus

View File

@ -271,7 +271,11 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
AssertInfo(expr_->vals_.size() == 1,
"element length in json array must be one");
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
if (!arg_inited_) {
arg_val_.SetValue<ValueType>(expr_->vals_[0]);
arg_inited_ = true;
}
auto target_val = arg_val_.GetValue<ValueType>();
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
@ -348,12 +352,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
if (expr_->column_.nested_path_.size() > 0) {
index = std::stoi(expr_->column_.nested_path_[0]);
}
std::unordered_set<ValueType> term_set;
for (const auto& element : expr_->vals_) {
term_set.insert(GetValueFromProto<ValueType>(element));
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
arg_inited_ = true;
}
if (term_set.empty()) {
if (arg_set_->Empty()) {
res.reset();
MoveCursor();
return res_vec;
@ -368,7 +372,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
int index,
const std::unordered_set<ValueType>& term_set) {
const std::shared_ptr<MultiElement>& term_set) {
for (int i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
@ -378,12 +382,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
res[i] = valid_res[i] = false;
continue;
}
if (term_set.empty() || index >= data[offset].length()) {
if (term_set->Empty() || index >= data[offset].length()) {
res[i] = false;
continue;
}
auto value = data[offset].get_data<GetType>(index);
res[i] = term_set.find(ValueType(value)) != term_set.end();
res[i] = term_set->In(ValueType(value));
}
};
@ -396,14 +400,14 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
res,
valid_res,
index,
term_set);
arg_set_);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
index,
term_set);
arg_set_);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -433,7 +437,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
AssertInfo(expr_->vals_.size() == 1,
"element length in json array must be one");
ValueType val = GetValueFromProto<ValueType>(expr_->vals_[0]);
if (!arg_inited_) {
arg_val_.SetValue<ValueType>(expr_->vals_[0]);
arg_inited_ = true;
}
auto val = arg_val_.GetValue<ValueType>();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto execute_sub_batch =
@ -514,12 +523,12 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::unordered_set<ValueType> term_set;
for (const auto& element : expr_->vals_) {
term_set.insert(GetValueFromProto<ValueType>(element));
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
arg_inited_ = true;
}
if (term_set.empty()) {
if (arg_set_->Empty()) {
res.reset();
MoveCursor();
return res_vec;
@ -534,7 +543,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string pointer,
const std::unordered_set<ValueType>& terms) {
const std::shared_ptr<MultiElement>& terms) {
auto executor = [&](size_t i) {
auto x = data[i].template at<GetType>(pointer);
if (x.error()) {
@ -547,11 +556,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
auto value = x.value();
// if the term set is {1}, and the value is 1.1, we should not return true.
return std::floor(value) == value &&
terms.find(ValueType(value)) != terms.end();
terms->In(ValueType(x.value()));
}
return false;
}
return terms.find(ValueType(x.value())) != terms.end();
return terms->In(ValueType(x.value()));
};
for (size_t i = 0; i < size; ++i) {
auto offset = i;
@ -562,7 +571,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
res[i] = valid_res[i] = false;
continue;
}
if (terms.empty()) {
if (terms->Empty()) {
res[i] = false;
continue;
}
@ -577,14 +586,14 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
res,
valid_res,
pointer,
term_set);
arg_set_);
} else {
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
pointer,
term_set);
arg_set_);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -676,16 +685,21 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
std::vector<T> vals;
for (auto& val : expr_->vals_) {
// Integral overflow process
bool overflowed = false;
auto converted_val = GetValueFromProtoWithOverflow<T>(val, overflowed);
if (!overflowed) {
vals.emplace_back(converted_val);
if (!arg_inited_) {
std::vector<T> vals;
for (auto& val : expr_->vals_) {
// Integral overflow process
bool overflowed = false;
auto converted_val =
GetValueFromProtoWithOverflow<T>(val, overflowed);
if (!overflowed) {
vals.emplace_back(converted_val);
}
}
arg_set_ = std::make_shared<SortVectorElement<T>>(vals);
arg_inited_ = true;
}
std::unordered_set<T> vals_set(vals.begin(), vals.end());
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const T* data,
@ -694,8 +708,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::unordered_set<T>& vals) {
TermElementFuncSet<T> func;
const std::shared_ptr<MultiElement>& vals) {
for (size_t i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
@ -705,7 +718,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = func(vals, data[offset]);
res[i] = vals->In(data[offset]);
}
};
int64_t processed_size;
@ -715,10 +728,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
input,
res,
valid_res,
vals_set);
arg_set_);
} else {
processed_size = ProcessDataChunks<T>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "

View File

@ -22,6 +22,7 @@
#include "common/Types.h"
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "exec/expression/Element.h"
#include "segcore/SegmentInterface.h"
namespace milvus {
@ -123,6 +124,9 @@ class PhyTermFilterExpr : public SegmentExpr {
milvus::Timestamp query_timestamp_;
bool cached_bits_inited_{false};
TargetBitmap cached_bits_;
bool arg_inited_{false};
std::shared_ptr<MultiElement> arg_set_;
SingleElement arg_val_;
};
} //namespace exec
} // namespace milvus

View File

@ -297,7 +297,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
ValueType val = GetValueFromProto<ValueType>(expr_->val_);
if (!arg_inited_) {
value_arg_.SetValue<ValueType>(expr_->val_);
arg_inited_ = true;
}
ValueType val = value_arg_.GetValue<ValueType>();
auto op_type = expr_->op_type_;
int index = -1;
if (expr_->column_.nested_path_.size() > 0) {
@ -506,7 +510,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
};
} else {
auto size_per_chunk = segment_->size_per_chunk();
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{
auto chunk_idx = offset / size_per_chunk;
auto chunk_offset = offset % size_per_chunk;
const auto& chunk =
@ -586,7 +590,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
return nullptr;
}
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
if (!arg_inited_) {
value_arg_.SetValue<ExprValueType>(expr_->val_);
arg_inited_ = true;
}
ExprValueType val = value_arg_.GetValue<ExprValueType>();
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
@ -847,6 +856,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
if (!arg_inited_) {
value_arg_.SetValue<IndexInnerType>(expr_->val_);
arg_inited_ = true;
}
if (auto res = PreCheckOverflow<T>()) {
return res;
}
@ -907,7 +920,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
}
return res;
};
auto val = GetValueFromProto<IndexInnerType>(expr_->val_);
IndexInnerType val = value_arg_.GetValue<IndexInnerType>();
auto res = ProcessIndexChunks<T>(execute_sub_batch, val);
AssertInfo(res->size() == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -996,6 +1009,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
return nullptr;
}
if (!arg_inited_) {
value_arg_.SetValue<IndexInnerType>(expr_->val_);
arg_inited_ = true;
}
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
@ -1123,7 +1140,12 @@ PhyUnaryRangeFilterExpr::CanUseIndexForJson() {
VectorPtr
PhyUnaryRangeFilterExpr::ExecTextMatch() {
using Index = index::TextMatchIndex;
auto query = GetValueFromProto<std::string>(expr_->val_);
if (!arg_inited_) {
value_arg_.SetValue<std::string>(expr_->val_);
arg_inited_ = true;
}
auto query = value_arg_.GetValue<std::string>();
int64_t slop = 0;
if (expr_->op_type_ == proto::plan::PhraseMatch) {
// It should be larger than 0 in normal cases. Check it incase of receiving old version proto.

View File

@ -24,6 +24,7 @@
#include "common/Types.h"
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "exec/expression/Element.h"
#include "index/Meta.h"
#include "index/ScalarIndex.h"
#include "segcore/SegmentInterface.h"
@ -389,6 +390,8 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
private:
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr> expr_;
int64_t overflow_check_pos_{0};
bool arg_inited_{false};
SingleElement value_arg_;
};
} // namespace exec
} // namespace milvus