From 8a9f02ef71009fd2c8904e06fd0118fbb70a85ef Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Sun, 16 Feb 2025 20:32:14 +0800 Subject: [PATCH] enhance: optimize expr performace for some points (#39695) 1. skip get expr arguments which deserialize proto for every batch execute. 2. replace unordered_set with sort array that has better performace for small set. #39688 Co-authored-by: luzhang --- .../expression/BinaryArithOpEvalRangeExpr.cpp | 57 ++-- .../expression/BinaryArithOpEvalRangeExpr.h | 5 + .../src/exec/expression/BinaryRangeExpr.cpp | 29 +- .../src/exec/expression/BinaryRangeExpr.h | 4 + internal/core/src/exec/expression/Element.h | 263 ++++++++++++++++++ .../src/exec/expression/JsonContainsExpr.cpp | 40 +-- .../src/exec/expression/JsonContainsExpr.h | 3 + .../core/src/exec/expression/TermExpr.cpp | 81 +++--- internal/core/src/exec/expression/TermExpr.h | 4 + .../core/src/exec/expression/UnaryExpr.cpp | 32 ++- internal/core/src/exec/expression/UnaryExpr.h | 3 + 11 files changed, 440 insertions(+), 81 deletions(-) create mode 100644 internal/core/src/exec/expression/Element.h diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp index 9f886c10da..6fab3aaf0c 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp @@ -123,14 +123,21 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson( TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); + if (!arg_inited_) { + value_arg_.SetValue(expr_->value_); + if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) { + right_operand_arg_.SetValue(ValueType()); + } else { + right_operand_arg_.SetValue(expr_->right_operand_); + } + arg_inited_ = true; + } + auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto value = GetValueFromProto(expr_->value_); - auto right_operand = - arith_type != proto::plan::ArithOpType::ArrayLength - ? GetValueFromProto(expr_->right_operand_) - : ValueType(); + auto value = value_arg_.GetValue(); + auto right_operand = right_operand_arg_.GetValue(); #define BinaryArithRangeJSONCompare(cmp) \ do { \ @@ -514,6 +521,17 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray( ValueType>; auto real_batch_size = has_offset_input_ ? input->size() : GetNextBatchSize(); + + if (!arg_inited_) { + value_arg_.SetValue(expr_->value_); + if (expr_->arith_op_type_ == proto::plan::ArithOpType::ArrayLength) { + right_operand_arg_.SetValue(ValueType()); + } else { + right_operand_arg_.SetValue(expr_->right_operand_); + } + arg_inited_ = true; + } + if (real_batch_size == 0) { return nullptr; } @@ -529,11 +547,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray( } auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto value = GetValueFromProto(expr_->value_); - auto right_operand = - arith_type != proto::plan::ArithOpType::ArrayLength - ? GetValueFromProto(expr_->right_operand_) - : ValueType(); + auto value = value_arg_.GetValue(); + auto right_operand = right_operand_arg_.GetValue(); #define BinaryArithRangeArrayCompare(cmp) \ do { \ @@ -898,9 +913,14 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex( if (real_batch_size == 0) { return nullptr; } - auto value = GetValueFromProto(expr_->value_); - auto right_operand = - GetValueFromProto(expr_->right_operand_); + if (!arg_inited_) { + value_arg_.SetValue(expr_->value_); + right_operand_arg_.SetValue(expr_->right_operand_); + arg_inited_ = true; + } + + auto value = value_arg_.GetValue(); + auto right_operand = right_operand_arg_.GetValue(); auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; auto sub_batch_size = has_offset_input_ ? input->size() : size_per_chunk_; @@ -1415,15 +1435,20 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData( return nullptr; } - auto value = GetValueFromProto(expr_->value_); - auto right_operand = - GetValueFromProto(expr_->right_operand_); auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); + if (!arg_inited_) { + value_arg_.SetValue(expr_->value_); + right_operand_arg_.SetValue(expr_->right_operand_); + arg_inited_ = true; + } + + auto value = value_arg_.GetValue(); + auto right_operand = right_operand_arg_.GetValue(); auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h index c69a928ba6..5e843de2d0 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h @@ -24,6 +24,7 @@ #include "common/Vector.h" #include "exec/expression/Expr.h" #include "segcore/SegmentInterface.h" +#include "exec/expression/Element.h" namespace milvus { namespace exec { @@ -485,6 +486,10 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr { private: std::shared_ptr expr_; + SingleElement right_operand_arg_; + SingleElement value_arg_; + bool arg_inited_{false}; }; + } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index 7dd0943794..b5453965c6 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -143,9 +143,14 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, OffsetVector* input) { lower_inclusive = expr_->lower_inclusive_; upper_inclusive = expr_->upper_inclusive_; - val1 = GetValueFromProto(expr_->lower_val_); - val2 = GetValueFromProto(expr_->upper_val_); + if (!arg_inited_) { + lower_arg_.SetValue(expr_->lower_val_); + upper_arg_.SetValue(expr_->upper_val_); + arg_inited_ = true; + } + val1 = lower_arg_.GetValue(); + val2 = upper_arg_.GetValue(); auto get_next_overflow_batch = [this](OffsetVector* input) -> ColumnVectorPtr { int64_t batch_size; @@ -358,8 +363,13 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) { bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; - ValueType val1 = GetValueFromProto(expr_->lower_val_); - ValueType val2 = GetValueFromProto(expr_->upper_val_); + if (!arg_inited_) { + lower_arg_.SetValue(expr_->lower_val_); + upper_arg_.SetValue(expr_->upper_val_); + arg_inited_ = true; + } + ValueType val1 = lower_arg_.GetValue(); + ValueType val2 = upper_arg_.GetValue(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto execute_sub_batch = @@ -464,8 +474,15 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) { bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; - ValueType val1 = GetValueFromProto(expr_->lower_val_); - ValueType val2 = GetValueFromProto(expr_->upper_val_); + + if (!arg_inited_) { + lower_arg_.SetValue(expr_->lower_val_); + upper_arg_.SetValue(expr_->upper_val_); + arg_inited_ = true; + } + ValueType val1 = lower_arg_.GetValue(); + ValueType val2 = upper_arg_.GetValue(); + int index = -1; if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.h b/internal/core/src/exec/expression/BinaryRangeExpr.h index 0ca4fac9e2..e0dd3e8f5a 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryRangeExpr.h @@ -22,6 +22,7 @@ #include "common/Types.h" #include "common/Vector.h" #include "exec/expression/Expr.h" +#include "exec/expression/Element.h" #include "segcore/SegmentInterface.h" namespace milvus { @@ -278,6 +279,9 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { private: std::shared_ptr expr_; int64_t overflow_check_pos_{0}; + SingleElement lower_arg_; + SingleElement upper_arg_; + bool arg_inited_{false}; }; } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/Element.h b/internal/core/src/exec/expression/Element.h new file mode 100644 index 0000000000..85455bc0b7 --- /dev/null +++ b/internal/core/src/exec/expression/Element.h @@ -0,0 +1,263 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "common/Types.h" +#include "exec/expression/EvalCtx.h" +#include "exec/expression/VectorFunction.h" +#include "exec/expression/Utils.h" +#include "exec/QueryContext.h" +#include "expr/ITypeExpr.h" +#include "query/PlanProto.h" + +namespace milvus { +namespace exec { + +class BaseElement { + public: + virtual ~BaseElement() = default; +}; + +class SingleElement : public BaseElement { + public: + using ValueType = std::variant; + + SingleElement() = default; + virtual ~SingleElement() = default; + + template + void + SetValue(const proto::plan::GenericValue& value) { + value_ = GetValueFromProto(value); + } + + template + void + SetValue(const T& value) { + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v) { + value_ = value; + } else { + static_assert(sizeof(T) == 0, + "Type not supported in SingleElement"); + } + } + + template + T + GetValue() const { + try { + return std::get(value_); + } catch (const std::bad_variant_access& e) { + PanicInfo(ErrorCode::UnexpectedError, + "SingleElement GetValue() failed: {}", + e.what()); + } + } + + public: + ValueType value_; +}; + +class MultiElement : public BaseElement { + public: + using ValueType = std::variant; + + MultiElement() = default; + virtual ~MultiElement() = default; + + virtual bool + In(const ValueType& value) const = 0; + + virtual bool + Empty() const = 0; + + virtual size_t + Size() const = 0; +}; + +template +class SortVectorElement : public MultiElement { + public: + explicit SortVectorElement( + const std::vector& values) { + for (auto& value : values) { + values_.push_back(GetValueFromProto(value)); + } + std::sort(values_.begin(), values_.end()); + sorted_ = true; + } + + explicit SortVectorElement(const std::vector& values) { + for (const auto& value : values) { + values_.push_back(value); + } + std::sort(values_.begin(), values_.end()); + sorted_ = true; + } + + bool + Empty() const override { + return values_.empty(); + } + + size_t + Size() const override { + return values_.size(); + } + + bool + In(const ValueType& value) const override { + AssertInfo(sorted_, "In() should be sorted before"); + if (std::holds_alternative(value)) { + return std::binary_search( + values_.begin(), values_.end(), std::get(value)); + } + return false; + } + + void + Sort() { + std::sort(values_.begin(), values_.end()); + sorted_ = true; + } + + void + AddElement(const T& value) { + values_.push_back(value); + } + + public: + std::vector values_; + bool sorted_{false}; +}; + +template +class FlatVectorElement : public MultiElement { + public: + explicit FlatVectorElement( + const std::vector& values) { + for (auto& value : values) { + values_.push_back(GetValueFromProto(value)); + } + } + + explicit FlatVectorElement(const std::vector& values) { + for (const auto& value : values) { + values_.push_back(value); + } + } + + bool + Empty() const override { + return values_.empty(); + } + + bool + In(const ValueType& value) const override { + if (std::holds_alternative(value)) { + for (const auto& v : values_) { + if (v == value) + return true; + } + } + return false; + } + + size_t + Size() const override { + return values_.size(); + } + + void + AddElement(const T& value) { + values_.push_back(value); + } + + public: + std::vector values_; +}; + +template +class SetElement : public MultiElement { + public: + explicit SetElement(const std::vector& values) { + for (auto& value : values) { + values_.insert(GetValueFromProto(value)); + } + } + + explicit SetElement(const std::vector& values) { + for (const auto& value : values) { + values_.insert(value); + } + } + + bool + Empty() const override { + return values_.empty(); + } + + bool + In(const ValueType& value) const override { + if (std::holds_alternative(value)) { + return values_.count(std::get(value)) > 0; + } + } + + void + AddElement(const T& value) { + values_.insert(value); + } + + size_t + Size() const override { + return values_.size(); + } + + public: + std::set values_; +}; + +} //namespace exec +} // namespace milvus diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index 3318a48228..3dd9e5b665 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -183,9 +183,9 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - std::unordered_set elements; - for (auto const& element : expr_->vals_) { - elements.insert(GetValueFromProto(element)); + if (!arg_inited_) { + arg_set_ = std::make_shared>(expr_->vals_); + arg_inited_ = true; } auto execute_sub_batch = []( @@ -195,11 +195,11 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { const int size, TargetBitmapView res, TargetBitmapView valid_res, - const std::unordered_set& elements) { + const std::shared_ptr& elements) { auto executor = [&](size_t i) { const auto& array = data[i]; for (int j = 0; j < array.length(); ++j) { - if (elements.count(array.template get_data(j)) > 0) { + if (elements->In(array.template get_data(j))) { return true; } } @@ -226,10 +226,10 @@ PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { input, res, valid_res, - elements); + arg_set_); } else { processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " @@ -258,10 +258,10 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - std::unordered_set elements; auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - for (auto const& element : expr_->vals_) { - elements.insert(GetValueFromProto(element)); + if (!arg_inited_) { + arg_set_ = std::make_shared>(expr_->vals_); + arg_inited_ = true; } auto execute_sub_batch = []( @@ -272,7 +272,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { TargetBitmapView res, TargetBitmapView valid_res, const std::string& pointer, - const std::unordered_set& elements) { + const std::shared_ptr& elements) { auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -284,7 +284,7 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { if (val.error()) { continue; } - if (elements.count(val.value()) > 0) { + if (elements->In(val.value()) > 0) { return true; } } @@ -311,14 +311,14 @@ PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { res, valid_res, pointer, - elements); + arg_set_); } else { processed_size = ProcessDataChunks(execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, - elements); + arg_set_); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " @@ -442,7 +442,7 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - std::unordered_set elements; + std::set elements; for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } @@ -455,9 +455,9 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) { const int size, TargetBitmapView res, TargetBitmapView valid_res, - const std::unordered_set& elements) { + const std::set& elements) { auto executor = [&](size_t i) { - std::unordered_set tmp_elements(elements); + std::set tmp_elements(elements); // Note: array can only be iterated once for (int j = 0; j < data[i].length(); ++j) { tmp_elements.erase(data[i].template get_data(j)); @@ -521,7 +521,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) { valid_res.set(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - std::unordered_set elements; + std::set elements; for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } @@ -535,14 +535,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) { TargetBitmapView res, TargetBitmapView valid_res, const std::string& pointer, - const std::unordered_set& elements) { + const std::set& elements) { auto executor = [&](const size_t i) -> bool { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); if (array.error()) { return false; } - std::unordered_set tmp_elements(elements); + std::set tmp_elements(elements); // Note: array can only be iterated once for (auto&& it : array) { auto val = it.template get(); diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index b3b0b15660..18b187328e 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -22,6 +22,7 @@ #include "common/Types.h" #include "common/Vector.h" #include "exec/expression/Expr.h" +#include "exec/expression/Element.h" #include "segcore/SegmentInterface.h" namespace milvus { @@ -90,6 +91,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { private: std::shared_ptr expr_; + bool arg_inited_{false}; + std::shared_ptr arg_set_; }; } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index 960d9731c9..ad8abf01d1 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -271,7 +271,11 @@ PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) { AssertInfo(expr_->vals_.size() == 1, "element length in json array must be one"); - ValueType target_val = GetValueFromProto(expr_->vals_[0]); + if (!arg_inited_) { + arg_val_.SetValue(expr_->vals_[0]); + arg_inited_ = true; + } + auto target_val = arg_val_.GetValue(); auto execute_sub_batch = []( @@ -348,12 +352,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); } - std::unordered_set term_set; - for (const auto& element : expr_->vals_) { - term_set.insert(GetValueFromProto(element)); + if (!arg_inited_) { + arg_set_ = std::make_shared>(expr_->vals_); + arg_inited_ = true; } - if (term_set.empty()) { + if (arg_set_->Empty()) { res.reset(); MoveCursor(); return res_vec; @@ -368,7 +372,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { TargetBitmapView res, TargetBitmapView valid_res, int index, - const std::unordered_set& term_set) { + const std::shared_ptr& term_set) { for (int i = 0; i < size; ++i) { auto offset = i; if constexpr (filter_type == FilterType::random) { @@ -378,12 +382,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { res[i] = valid_res[i] = false; continue; } - if (term_set.empty() || index >= data[offset].length()) { + if (term_set->Empty() || index >= data[offset].length()) { res[i] = false; continue; } auto value = data[offset].get_data(index); - res[i] = term_set.find(ValueType(value)) != term_set.end(); + res[i] = term_set->In(ValueType(value)); } }; @@ -396,14 +400,14 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { res, valid_res, index, - term_set); + arg_set_); } else { processed_size = ProcessDataChunks(execute_sub_batch, std::nullptr_t{}, res, valid_res, index, - term_set); + arg_set_); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " @@ -433,7 +437,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) { AssertInfo(expr_->vals_.size() == 1, "element length in json array must be one"); - ValueType val = GetValueFromProto(expr_->vals_[0]); + if (!arg_inited_) { + arg_val_.SetValue(expr_->vals_[0]); + arg_inited_ = true; + } + auto val = arg_val_.GetValue(); + auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto execute_sub_batch = @@ -514,12 +523,12 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { valid_res.set(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - std::unordered_set term_set; - for (const auto& element : expr_->vals_) { - term_set.insert(GetValueFromProto(element)); + if (!arg_inited_) { + arg_set_ = std::make_shared>(expr_->vals_); + arg_inited_ = true; } - if (term_set.empty()) { + if (arg_set_->Empty()) { res.reset(); MoveCursor(); return res_vec; @@ -534,7 +543,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { TargetBitmapView res, TargetBitmapView valid_res, const std::string pointer, - const std::unordered_set& terms) { + const std::shared_ptr& terms) { auto executor = [&](size_t i) { auto x = data[i].template at(pointer); if (x.error()) { @@ -547,11 +556,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { auto value = x.value(); // if the term set is {1}, and the value is 1.1, we should not return true. return std::floor(value) == value && - terms.find(ValueType(value)) != terms.end(); + terms->In(ValueType(x.value())); } return false; } - return terms.find(ValueType(x.value())) != terms.end(); + return terms->In(ValueType(x.value())); }; for (size_t i = 0; i < size; ++i) { auto offset = i; @@ -562,7 +571,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { res[i] = valid_res[i] = false; continue; } - if (terms.empty()) { + if (terms->Empty()) { res[i] = false; continue; } @@ -577,14 +586,14 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { res, valid_res, pointer, - term_set); + arg_set_); } else { processed_size = ProcessDataChunks(execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, - term_set); + arg_set_); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " @@ -676,16 +685,21 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - std::vector vals; - for (auto& val : expr_->vals_) { - // Integral overflow process - bool overflowed = false; - auto converted_val = GetValueFromProtoWithOverflow(val, overflowed); - if (!overflowed) { - vals.emplace_back(converted_val); + if (!arg_inited_) { + std::vector vals; + for (auto& val : expr_->vals_) { + // Integral overflow process + bool overflowed = false; + auto converted_val = + GetValueFromProtoWithOverflow(val, overflowed); + if (!overflowed) { + vals.emplace_back(converted_val); + } } + arg_set_ = std::make_shared>(vals); + arg_inited_ = true; } - std::unordered_set vals_set(vals.begin(), vals.end()); + auto execute_sub_batch = []( const T* data, @@ -694,8 +708,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { const int size, TargetBitmapView res, TargetBitmapView valid_res, - const std::unordered_set& vals) { - TermElementFuncSet func; + const std::shared_ptr& vals) { for (size_t i = 0; i < size; ++i) { auto offset = i; if constexpr (filter_type == FilterType::random) { @@ -705,7 +718,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { res[i] = valid_res[i] = false; continue; } - res[i] = func(vals, data[offset]); + res[i] = vals->In(data[offset]); } }; int64_t processed_size; @@ -715,10 +728,10 @@ PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { input, res, valid_res, - vals_set); + arg_set_); } else { processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set); + execute_sub_batch, std::nullptr_t{}, res, valid_res, arg_set_); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " diff --git a/internal/core/src/exec/expression/TermExpr.h b/internal/core/src/exec/expression/TermExpr.h index b5660374b1..597eb83560 100644 --- a/internal/core/src/exec/expression/TermExpr.h +++ b/internal/core/src/exec/expression/TermExpr.h @@ -22,6 +22,7 @@ #include "common/Types.h" #include "common/Vector.h" #include "exec/expression/Expr.h" +#include "exec/expression/Element.h" #include "segcore/SegmentInterface.h" namespace milvus { @@ -123,6 +124,9 @@ class PhyTermFilterExpr : public SegmentExpr { milvus::Timestamp query_timestamp_; bool cached_bits_inited_{false}; TargetBitmap cached_bits_; + bool arg_inited_{false}; + std::shared_ptr arg_set_; + SingleElement arg_val_; }; } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index e6a2dfe210..743fc454cc 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -297,7 +297,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - ValueType val = GetValueFromProto(expr_->val_); + if (!arg_inited_) { + value_arg_.SetValue(expr_->val_); + arg_inited_ = true; + } + ValueType val = value_arg_.GetValue(); auto op_type = expr_->op_type_; int index = -1; if (expr_->column_.nested_path_.size() > 0) { @@ -506,7 +510,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { }; } else { auto size_per_chunk = segment_->size_per_chunk(); - retrieve = [ size_per_chunk, this ](int64_t offset) -> auto { + retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{ auto chunk_idx = offset / size_per_chunk; auto chunk_offset = offset % size_per_chunk; const auto& chunk = @@ -586,7 +590,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) { return nullptr; } - ExprValueType val = GetValueFromProto(expr_->val_); + if (!arg_inited_) { + value_arg_.SetValue(expr_->val_); + arg_inited_ = true; + } + + ExprValueType val = value_arg_.GetValue(); auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); @@ -847,6 +856,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { conditional_t, std::string, T> IndexInnerType; using Index = index::ScalarIndex; + if (!arg_inited_) { + value_arg_.SetValue(expr_->val_); + arg_inited_ = true; + } if (auto res = PreCheckOverflow()) { return res; } @@ -907,7 +920,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { } return res; }; - auto val = GetValueFromProto(expr_->val_); + IndexInnerType val = value_arg_.GetValue(); auto res = ProcessIndexChunks(execute_sub_batch, val); AssertInfo(res->size() == real_batch_size, "internal error: expr processed rows {} not equal " @@ -996,6 +1009,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { return nullptr; } + if (!arg_inited_) { + value_arg_.SetValue(expr_->val_); + arg_inited_ = true; + } IndexInnerType val = GetValueFromProto(expr_->val_); auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); @@ -1123,7 +1140,12 @@ PhyUnaryRangeFilterExpr::CanUseIndexForJson() { VectorPtr PhyUnaryRangeFilterExpr::ExecTextMatch() { using Index = index::TextMatchIndex; - auto query = GetValueFromProto(expr_->val_); + if (!arg_inited_) { + value_arg_.SetValue(expr_->val_); + arg_inited_ = true; + } + auto query = value_arg_.GetValue(); + int64_t slop = 0; if (expr_->op_type_ == proto::plan::PhraseMatch) { // It should be larger than 0 in normal cases. Check it incase of receiving old version proto. diff --git a/internal/core/src/exec/expression/UnaryExpr.h b/internal/core/src/exec/expression/UnaryExpr.h index 037856859e..089cb3fc61 100644 --- a/internal/core/src/exec/expression/UnaryExpr.h +++ b/internal/core/src/exec/expression/UnaryExpr.h @@ -24,6 +24,7 @@ #include "common/Types.h" #include "common/Vector.h" #include "exec/expression/Expr.h" +#include "exec/expression/Element.h" #include "index/Meta.h" #include "index/ScalarIndex.h" #include "segcore/SegmentInterface.h" @@ -389,6 +390,8 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { private: std::shared_ptr expr_; int64_t overflow_check_pos_{0}; + bool arg_inited_{false}; + SingleElement value_arg_; }; } // namespace exec } // namespace milvus