enhance: updated multiple places where the expr copies the input values in every loop (#45680)

issue: https://github.com/milvus-io/milvus/issues/45679

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
Buqian Zheng 2025-11-20 01:51:07 +08:00 committed by GitHub
parent 8ee8c01bcf
commit 5b85f0e4dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 79 additions and 43 deletions

View File

@ -269,7 +269,7 @@ class PhyCompareFilterExpr : public Expr {
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
if (segment_chunk_reader_.segment_->is_chunked()) {
return ProcessBothDataChunksForMultipleChunk<T,
U,
@ -288,7 +288,7 @@ class PhyCompareFilterExpr : public Expr {
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
int64_t size = input->size();
int64_t processed_size = 0;
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
@ -380,7 +380,7 @@ class PhyCompareFilterExpr : public Expr {
ProcessBothDataChunksForSingleChunk(FUNC func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
int64_t processed_size = 0;
const auto active_count = segment_chunk_reader_.active_count_;
@ -450,7 +450,7 @@ class PhyCompareFilterExpr : public Expr {
ProcessBothDataChunksForMultipleChunk(FUNC func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
int64_t processed_size = 0;
// only call this function when left and right are not indexed, so they have the same number of chunks

View File

@ -41,6 +41,7 @@ class SingleElement : public BaseElement {
using ValueType = std::variant<std::monostate,
bool,
int8_t,
uint8_t,
int16_t,
int32_t,
int64_t,
@ -62,6 +63,7 @@ class SingleElement : public BaseElement {
void
SetValue(const T& value) {
if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> ||
std::is_same_v<T, uint8_t> ||
std::is_same_v<T, int16_t> ||
std::is_same_v<T, int32_t> ||
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
@ -95,6 +97,7 @@ class MultiElement : public BaseElement {
using ValueType = std::variant<std::monostate,
bool,
int8_t,
uint8_t,
int16_t,
int32_t,
int64_t,
@ -216,7 +219,7 @@ class FlatVectorElement : public MultiElement {
In(const ValueType& value) const override {
if (std::holds_alternative<T>(value)) {
for (const auto& v : values_) {
if (v == value)
if (v == std::get<T>(value))
return true;
}
}

View File

@ -362,7 +362,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
// For sealed segment, only single chunk
Assert(num_data_chunk_ == 1);
auto need_size =
@ -423,7 +423,7 @@ class SegmentExpr : public Expr {
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
// For non_chunked sealed segment, only single chunk
Assert(num_data_chunk_ == 1);
@ -451,7 +451,7 @@ class SegmentExpr : public Expr {
VectorPtr
ProcessIndexChunksByOffsets(FUNC func,
OffsetVector* input,
ValTypes... values) {
const ValTypes&... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
using IndexInnerType = std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
@ -480,7 +480,7 @@ class SegmentExpr : public Expr {
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
auto& skip_index = segment_->GetSkipIndex();
@ -532,7 +532,7 @@ class SegmentExpr : public Expr {
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
int64_t processed_size = 0;
// index reverse lookup
@ -690,7 +690,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
int64_t processed_size = 0;
if constexpr (std::is_same_v<T, std::string_view> ||
std::is_same_v<T, Json>) {
@ -782,7 +782,7 @@ class SegmentExpr : public Expr {
TargetBitmapView res,
TargetBitmapView valid_res,
bool process_all_chunks,
ValTypes... values) {
const ValTypes&... values) {
int64_t processed_size = 0;
size_t start_chunk = process_all_chunks ? 0 : current_data_chunk_;
@ -934,7 +934,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
return ProcessMultipleChunksCommon<T, NeedSegmentOffsets>(
func, skip_func, res, valid_res, false, values...);
}
@ -946,7 +946,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
return ProcessMultipleChunksCommon<T>(
func, skip_func, res, valid_res, true, values...);
}
@ -961,7 +961,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
if (segment_->is_chunked()) {
return ProcessDataChunksForMultipleChunk<T, NeedSegmentOffsets>(
func, skip_func, res, valid_res, values...);
@ -978,7 +978,7 @@ class SegmentExpr : public Expr {
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
const ValTypes&... values) {
if (segment_->is_chunked()) {
return ProcessAllChunksForMultipleChunk<T>(
func, skip_func, res, valid_res, values...);
@ -1010,7 +1010,7 @@ class SegmentExpr : public Expr {
template <typename T, typename FUNC, typename... ValTypes>
VectorPtr
ProcessIndexChunks(FUNC func, ValTypes... values) {
ProcessIndexChunks(FUNC func, const ValTypes&... values) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
@ -1360,7 +1360,7 @@ class SegmentExpr : public Expr {
template <typename T, typename FUNC, typename... ValTypes>
void
ProcessIndexChunksV2(FUNC func, ValTypes... values) {
ProcessIndexChunksV2(FUNC func, const ValTypes&... values) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;

View File

@ -544,11 +544,18 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::vector<proto::plan::Array> elements;
for (auto const& element : expr_->vals_) {
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
if (!arg_inited_) {
auto elements = std::make_shared<std::vector<proto::plan::Array>>();
for (auto const& element : expr_->vals_) {
elements->emplace_back(
GetValueFromProto<proto::plan::Array>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
}
auto elements = std::static_pointer_cast<std::vector<proto::plan::Array>>(
arg_cached_set_);
size_t processed_cursor = 0;
auto execute_sub_batch =
[&processed_cursor, &
@ -613,14 +620,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
res,
valid_res,
pointer,
elements);
*elements);
} else {
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
pointer,
elements);
*elements);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -739,11 +746,17 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
std::set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueWithCastNumber<GetType>(element));
if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) {
elements->insert(GetValueWithCastNumber<GetType>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
}
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
int processed_cursor = 0;
auto execute_sub_batch =
[&processed_cursor, &
@ -791,10 +804,10 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
input,
res,
valid_res,
elements);
*elements);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
execute_sub_batch, std::nullptr_t{}, res, valid_res, *elements);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -832,11 +845,17 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
std::set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) {
elements->insert(GetValueFromProto<GetType>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
}
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
int processed_cursor = 0;
auto execute_sub_batch =
[&processed_cursor, &
@ -907,14 +926,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
res,
valid_res,
pointer,
elements);
*elements);
} else {
processed_size = ProcessDataChunks<Json>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
pointer,
elements);
*elements);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
@ -935,12 +954,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
if (real_batch_size == 0) {
return nullptr;
}
std::set<GetType> elements;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));
if (!arg_inited_) {
auto elements = std::make_shared<std::set<GetType>>();
for (auto const& element : expr_->vals_) {
elements->insert(GetValueFromProto<GetType>(element));
}
arg_cached_set_ = elements;
arg_inited_ = true;
}
if (elements.empty()) {
auto elements =
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
if (elements->empty()) {
MoveCursor();
return std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size, false),
@ -966,7 +992,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
pointer, milvus::index::JSONType::ARRAY);
if (!target_field.empty()) {
ShreddingArrayBsonContainsAllExecutor<GetType> executor(
elements);
*elements);
index->ExecutorForShreddingData<std::string_view>(
op_ctx_,
@ -989,7 +1015,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
return;
}
std::set<GetType> tmp_elements(elements);
std::set<GetType> tmp_elements(*elements);
for (const auto& element : val.value()) {
auto value = milvus::BsonView::GetValueFromBsonView<GetType>(
element.get_value());

View File

@ -554,6 +554,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
bool arg_inited_{false};
std::shared_ptr<MultiElement> arg_set_;
std::shared_ptr<MultiElement> arg_set_double_;
std::shared_ptr<void>
arg_cached_set_; // For caching std::set<T> or std::vector<T>
PinWrapper<index::JsonKeyStats*> pinned_json_stats_{nullptr};
};
} //namespace exec

View File

@ -878,16 +878,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
return nullptr;
}
std::vector<uint8_t> vals;
for (auto& val : expr_->vals_) {
vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0);
if (!arg_inited_) {
std::vector<uint8_t> vals;
for (auto& val : expr_->vals_) {
vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0);
}
arg_set_ = std::make_shared<FlatVectorElement<uint8_t>>(vals);
arg_inited_ = true;
}
auto execute_sub_batch = [](Index* index_ptr,
const std::vector<uint8_t>& vals) {
TermIndexFunc<bool> func;
return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
};
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals);
auto args = std::dynamic_pointer_cast<FlatVectorElement<uint8_t>>(arg_set_);
auto res = ProcessIndexChunks<bool>(execute_sub_batch, args->values_);
return res;
}