enhance: updated multiple places where the expr copies the input values in every loop (#45680)

issue: https://github.com/milvus-io/milvus/issues/45679

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
Buqian Zheng 2025-11-20 01:51:07 +08:00 committed by GitHub
parent 8ee8c01bcf
commit 5b85f0e4dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 79 additions and 43 deletions

View File

@ -269,7 +269,7 @@ class PhyCompareFilterExpr : public Expr {
OffsetVector* input, OffsetVector* input,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
if (segment_chunk_reader_.segment_->is_chunked()) { if (segment_chunk_reader_.segment_->is_chunked()) {
return ProcessBothDataChunksForMultipleChunk<T, return ProcessBothDataChunksForMultipleChunk<T,
U, U,
@ -288,7 +288,7 @@ class PhyCompareFilterExpr : public Expr {
OffsetVector* input, OffsetVector* input,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
int64_t size = input->size(); int64_t size = input->size();
int64_t processed_size = 0; int64_t processed_size = 0;
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
@ -380,7 +380,7 @@ class PhyCompareFilterExpr : public Expr {
ProcessBothDataChunksForSingleChunk(FUNC func, ProcessBothDataChunksForSingleChunk(FUNC func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
int64_t processed_size = 0; int64_t processed_size = 0;
const auto active_count = segment_chunk_reader_.active_count_; const auto active_count = segment_chunk_reader_.active_count_;
@ -450,7 +450,7 @@ class PhyCompareFilterExpr : public Expr {
ProcessBothDataChunksForMultipleChunk(FUNC func, ProcessBothDataChunksForMultipleChunk(FUNC func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
int64_t processed_size = 0; int64_t processed_size = 0;
// only call this function when left and right are not indexed, so they have the same number of chunks // only call this function when left and right are not indexed, so they have the same number of chunks

View File

@ -41,6 +41,7 @@ class SingleElement : public BaseElement {
using ValueType = std::variant<std::monostate, using ValueType = std::variant<std::monostate,
bool, bool,
int8_t, int8_t,
uint8_t,
int16_t, int16_t,
int32_t, int32_t,
int64_t, int64_t,
@ -62,6 +63,7 @@ class SingleElement : public BaseElement {
void void
SetValue(const T& value) { SetValue(const T& value) {
if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> || if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> ||
std::is_same_v<T, uint8_t> ||
std::is_same_v<T, int16_t> || std::is_same_v<T, int16_t> ||
std::is_same_v<T, int32_t> || std::is_same_v<T, int32_t> ||
std::is_same_v<T, int64_t> || std::is_same_v<T, float> || std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
@ -95,6 +97,7 @@ class MultiElement : public BaseElement {
using ValueType = std::variant<std::monostate, using ValueType = std::variant<std::monostate,
bool, bool,
int8_t, int8_t,
uint8_t,
int16_t, int16_t,
int32_t, int32_t,
int64_t, int64_t,
@ -216,7 +219,7 @@ class FlatVectorElement : public MultiElement {
In(const ValueType& value) const override { In(const ValueType& value) const override {
if (std::holds_alternative<T>(value)) { if (std::holds_alternative<T>(value)) {
for (const auto& v : values_) { for (const auto& v : values_) {
if (v == value) if (v == std::get<T>(value))
return true; return true;
} }
} }

View File

@ -362,7 +362,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
// For sealed segment, only single chunk // For sealed segment, only single chunk
Assert(num_data_chunk_ == 1); Assert(num_data_chunk_ == 1);
auto need_size = auto need_size =
@ -423,7 +423,7 @@ class SegmentExpr : public Expr {
OffsetVector* input, OffsetVector* input,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
// For non_chunked sealed segment, only single chunk // For non_chunked sealed segment, only single chunk
Assert(num_data_chunk_ == 1); Assert(num_data_chunk_ == 1);
@ -451,7 +451,7 @@ class SegmentExpr : public Expr {
VectorPtr VectorPtr
ProcessIndexChunksByOffsets(FUNC func, ProcessIndexChunksByOffsets(FUNC func,
OffsetVector* input, OffsetVector* input,
ValTypes... values) { const ValTypes&... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
using IndexInnerType = std:: using IndexInnerType = std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>; conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
@ -480,7 +480,7 @@ class SegmentExpr : public Expr {
OffsetVector* input, OffsetVector* input,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
auto& skip_index = segment_->GetSkipIndex(); auto& skip_index = segment_->GetSkipIndex();
@ -532,7 +532,7 @@ class SegmentExpr : public Expr {
OffsetVector* input, OffsetVector* input,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
int64_t processed_size = 0; int64_t processed_size = 0;
// index reverse lookup // index reverse lookup
@ -690,7 +690,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
int64_t processed_size = 0; int64_t processed_size = 0;
if constexpr (std::is_same_v<T, std::string_view> || if constexpr (std::is_same_v<T, std::string_view> ||
std::is_same_v<T, Json>) { std::is_same_v<T, Json>) {
@ -782,7 +782,7 @@ class SegmentExpr : public Expr {
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
bool process_all_chunks, bool process_all_chunks,
ValTypes... values) { const ValTypes&... values) {
int64_t processed_size = 0; int64_t processed_size = 0;
size_t start_chunk = process_all_chunks ? 0 : current_data_chunk_; size_t start_chunk = process_all_chunks ? 0 : current_data_chunk_;
@ -934,7 +934,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
return ProcessMultipleChunksCommon<T, NeedSegmentOffsets>( return ProcessMultipleChunksCommon<T, NeedSegmentOffsets>(
func, skip_func, res, valid_res, false, values...); func, skip_func, res, valid_res, false, values...);
} }
@ -946,7 +946,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
return ProcessMultipleChunksCommon<T>( return ProcessMultipleChunksCommon<T>(
func, skip_func, res, valid_res, true, values...); func, skip_func, res, valid_res, true, values...);
} }
@ -961,7 +961,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
if (segment_->is_chunked()) { if (segment_->is_chunked()) {
return ProcessDataChunksForMultipleChunk<T, NeedSegmentOffsets>( return ProcessDataChunksForMultipleChunk<T, NeedSegmentOffsets>(
func, skip_func, res, valid_res, values...); func, skip_func, res, valid_res, values...);
@ -978,7 +978,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func, std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res, TargetBitmapView res,
TargetBitmapView valid_res, TargetBitmapView valid_res,
ValTypes... values) { const ValTypes&... values) {
if (segment_->is_chunked()) { if (segment_->is_chunked()) {
return ProcessAllChunksForMultipleChunk<T>( return ProcessAllChunksForMultipleChunk<T>(
func, skip_func, res, valid_res, values...); func, skip_func, res, valid_res, values...);
@ -1010,7 +1010,7 @@ class SegmentExpr : public Expr {
template <typename T, typename FUNC, typename... ValTypes> template <typename T, typename FUNC, typename... ValTypes>
VectorPtr VectorPtr
ProcessIndexChunks(FUNC func, ValTypes... values) { ProcessIndexChunks(FUNC func, const ValTypes&... values) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
@ -1360,7 +1360,7 @@ class SegmentExpr : public Expr {
template <typename T, typename FUNC, typename... ValTypes> template <typename T, typename FUNC, typename... ValTypes>
void void
ProcessIndexChunksV2(FUNC func, ValTypes... values) { ProcessIndexChunksV2(FUNC func, const ValTypes&... values) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;

View File

@ -544,11 +544,18 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::vector<proto::plan::Array> elements; if (!arg_inited_) {
auto elements = std::make_shared<std::vector<proto::plan::Array>>();
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element)); elements->emplace_back(
GetValueFromProto<proto::plan::Array>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
} }
auto elements = std::static_pointer_cast<std::vector<proto::plan::Array>>(
arg_cached_set_);
size_t processed_cursor = 0; size_t processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[&processed_cursor, & [&processed_cursor, &
@ -613,14 +620,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
res, res,
valid_res, valid_res,
pointer, pointer,
elements); *elements);
} else { } else {
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch, processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
std::nullptr_t{}, std::nullptr_t{},
res, res,
valid_res, valid_res,
pointer, pointer,
elements); *elements);
} }
AssertInfo(processed_size == real_batch_size, AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal " "internal error: expr processed rows {} not equal "
@ -739,11 +746,17 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
std::set<GetType> elements; if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.insert(GetValueWithCastNumber<GetType>(element)); elements->insert(GetValueWithCastNumber<GetType>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
} }
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
int processed_cursor = 0; int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[&processed_cursor, & [&processed_cursor, &
@ -791,10 +804,10 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
input, input,
res, res,
valid_res, valid_res,
elements); *elements);
} else { } else {
processed_size = ProcessDataChunks<milvus::ArrayView>( processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); execute_sub_batch, std::nullptr_t{}, res, valid_res, *elements);
} }
AssertInfo(processed_size == real_batch_size, AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal " "internal error: expr processed rows {} not equal "
@ -832,11 +845,17 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::set<GetType> elements; if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element)); elements->insert(GetValueFromProto<GetType>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
} }
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
int processed_cursor = 0; int processed_cursor = 0;
auto execute_sub_batch = auto execute_sub_batch =
[&processed_cursor, & [&processed_cursor, &
@ -907,14 +926,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
res, res,
valid_res, valid_res,
pointer, pointer,
elements); *elements);
} else { } else {
processed_size = ProcessDataChunks<Json>(execute_sub_batch, processed_size = ProcessDataChunks<Json>(execute_sub_batch,
std::nullptr_t{}, std::nullptr_t{},
res, res,
valid_res, valid_res,
pointer, pointer,
elements); *elements);
} }
AssertInfo(processed_size == real_batch_size, AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal " "internal error: expr processed rows {} not equal "
@ -935,12 +954,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
if (real_batch_size == 0) { if (real_batch_size == 0) {
return nullptr; return nullptr;
} }
std::set<GetType> elements;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) { for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element)); elements->insert(GetValueFromProto<GetType>(element));
} }
if (elements.empty()) { arg_cached_set_ = elements;
arg_inited_ = true;
}
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
if (elements->empty()) {
MoveCursor(); MoveCursor();
return std::make_shared<ColumnVector>( return std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, false),
@ -966,7 +992,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
pointer, milvus::index::JSONType::ARRAY); pointer, milvus::index::JSONType::ARRAY);
if (!target_field.empty()) { if (!target_field.empty()) {
ShreddingArrayBsonContainsAllExecutor<GetType> executor( ShreddingArrayBsonContainsAllExecutor<GetType> executor(
elements); *elements);
index->ExecutorForShreddingData<std::string_view>( index->ExecutorForShreddingData<std::string_view>(
op_ctx_, op_ctx_,
@ -989,7 +1015,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
return; return;
} }
std::set<GetType> tmp_elements(elements); std::set<GetType> tmp_elements(*elements);
for (const auto& element : val.value()) { for (const auto& element : val.value()) {
auto value = milvus::BsonView::GetValueFromBsonView<GetType>( auto value = milvus::BsonView::GetValueFromBsonView<GetType>(
element.get_value()); element.get_value());

View File

@ -554,6 +554,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
bool arg_inited_{false}; bool arg_inited_{false};
std::shared_ptr<MultiElement> arg_set_; std::shared_ptr<MultiElement> arg_set_;
std::shared_ptr<MultiElement> arg_set_double_; std::shared_ptr<MultiElement> arg_set_double_;
std::shared_ptr<void>
arg_cached_set_; // For caching std::set<T> or std::vector<T>
PinWrapper<index::JsonKeyStats*> pinned_json_stats_{nullptr}; PinWrapper<index::JsonKeyStats*> pinned_json_stats_{nullptr};
}; };
} //namespace exec } //namespace exec

View File

@ -878,16 +878,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
return nullptr; return nullptr;
} }
if (!arg_inited_) {
std::vector<uint8_t> vals; std::vector<uint8_t> vals;
for (auto& val : expr_->vals_) { for (auto& val : expr_->vals_) {
vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0); vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0);
} }
arg_set_ = std::make_shared<FlatVectorElement<uint8_t>>(vals);
arg_inited_ = true;
}
auto execute_sub_batch = [](Index* index_ptr, auto execute_sub_batch = [](Index* index_ptr,
const std::vector<uint8_t>& vals) { const std::vector<uint8_t>& vals) {
TermIndexFunc<bool> func; TermIndexFunc<bool> func;
return std::move(func(index_ptr, vals.size(), (bool*)vals.data())); return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
}; };
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals); auto args = std::dynamic_pointer_cast<FlatVectorElement<uint8_t>>(arg_set_);
auto res = ProcessIndexChunks<bool>(execute_sub_batch, args->values_);
return res; return res;
} }