mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: updated multiple places where the expr copies the input values in every loop (#45680)
issue: https://github.com/milvus-io/milvus/issues/45679 Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
parent
8ee8c01bcf
commit
5b85f0e4dc
@ -269,7 +269,7 @@ class PhyCompareFilterExpr : public Expr {
|
||||
OffsetVector* input,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
if (segment_chunk_reader_.segment_->is_chunked()) {
|
||||
return ProcessBothDataChunksForMultipleChunk<T,
|
||||
U,
|
||||
@ -288,7 +288,7 @@ class PhyCompareFilterExpr : public Expr {
|
||||
OffsetVector* input,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t size = input->size();
|
||||
int64_t processed_size = 0;
|
||||
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
|
||||
@ -380,7 +380,7 @@ class PhyCompareFilterExpr : public Expr {
|
||||
ProcessBothDataChunksForSingleChunk(FUNC func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
const auto active_count = segment_chunk_reader_.active_count_;
|
||||
@ -450,7 +450,7 @@ class PhyCompareFilterExpr : public Expr {
|
||||
ProcessBothDataChunksForMultipleChunk(FUNC func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
// only call this function when left and right are not indexed, so they have the same number of chunks
|
||||
|
||||
@ -41,6 +41,7 @@ class SingleElement : public BaseElement {
|
||||
using ValueType = std::variant<std::monostate,
|
||||
bool,
|
||||
int8_t,
|
||||
uint8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
@ -62,6 +63,7 @@ class SingleElement : public BaseElement {
|
||||
void
|
||||
SetValue(const T& value) {
|
||||
if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, uint8_t> ||
|
||||
std::is_same_v<T, int16_t> ||
|
||||
std::is_same_v<T, int32_t> ||
|
||||
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
|
||||
@ -95,6 +97,7 @@ class MultiElement : public BaseElement {
|
||||
using ValueType = std::variant<std::monostate,
|
||||
bool,
|
||||
int8_t,
|
||||
uint8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
@ -216,7 +219,7 @@ class FlatVectorElement : public MultiElement {
|
||||
In(const ValueType& value) const override {
|
||||
if (std::holds_alternative<T>(value)) {
|
||||
for (const auto& v : values_) {
|
||||
if (v == value)
|
||||
if (v == std::get<T>(value))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -362,7 +362,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
// For sealed segment, only single chunk
|
||||
Assert(num_data_chunk_ == 1);
|
||||
auto need_size =
|
||||
@ -423,7 +423,7 @@ class SegmentExpr : public Expr {
|
||||
OffsetVector* input,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
// For non_chunked sealed segment, only single chunk
|
||||
Assert(num_data_chunk_ == 1);
|
||||
|
||||
@ -451,7 +451,7 @@ class SegmentExpr : public Expr {
|
||||
VectorPtr
|
||||
ProcessIndexChunksByOffsets(FUNC func,
|
||||
OffsetVector* input,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
|
||||
using IndexInnerType = std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
|
||||
@ -480,7 +480,7 @@ class SegmentExpr : public Expr {
|
||||
OffsetVector* input,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
|
||||
auto& skip_index = segment_->GetSkipIndex();
|
||||
|
||||
@ -532,7 +532,7 @@ class SegmentExpr : public Expr {
|
||||
OffsetVector* input,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
// index reverse lookup
|
||||
@ -690,7 +690,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t processed_size = 0;
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
@ -782,7 +782,7 @@ class SegmentExpr : public Expr {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
bool process_all_chunks,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
size_t start_chunk = process_all_chunks ? 0 : current_data_chunk_;
|
||||
@ -934,7 +934,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
return ProcessMultipleChunksCommon<T, NeedSegmentOffsets>(
|
||||
func, skip_func, res, valid_res, false, values...);
|
||||
}
|
||||
@ -946,7 +946,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
return ProcessMultipleChunksCommon<T>(
|
||||
func, skip_func, res, valid_res, true, values...);
|
||||
}
|
||||
@ -961,7 +961,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
if (segment_->is_chunked()) {
|
||||
return ProcessDataChunksForMultipleChunk<T, NeedSegmentOffsets>(
|
||||
func, skip_func, res, valid_res, values...);
|
||||
@ -978,7 +978,7 @@ class SegmentExpr : public Expr {
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
const ValTypes&... values) {
|
||||
if (segment_->is_chunked()) {
|
||||
return ProcessAllChunksForMultipleChunk<T>(
|
||||
func, skip_func, res, valid_res, values...);
|
||||
@ -1010,7 +1010,7 @@ class SegmentExpr : public Expr {
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
VectorPtr
|
||||
ProcessIndexChunks(FUNC func, ValTypes... values) {
|
||||
ProcessIndexChunks(FUNC func, const ValTypes&... values) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
@ -1360,7 +1360,7 @@ class SegmentExpr : public Expr {
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
void
|
||||
ProcessIndexChunksV2(FUNC func, ValTypes... values) {
|
||||
ProcessIndexChunksV2(FUNC func, const ValTypes&... values) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
|
||||
@ -544,11 +544,18 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::vector<proto::plan::Array> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||
if (!arg_inited_) {
|
||||
auto elements = std::make_shared<std::vector<proto::plan::Array>>();
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements->emplace_back(
|
||||
GetValueFromProto<proto::plan::Array>(element));
|
||||
}
|
||||
arg_cached_set_ = elements;
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto elements = std::static_pointer_cast<std::vector<proto::plan::Array>>(
|
||||
arg_cached_set_);
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[&processed_cursor, &
|
||||
@ -613,14 +620,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
*elements);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
*elements);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -739,11 +746,17 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
std::set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueWithCastNumber<GetType>(element));
|
||||
if (!arg_inited_) {
|
||||
auto elements = std::make_shared<std::set<GetType>>();
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements->insert(GetValueWithCastNumber<GetType>(element));
|
||||
}
|
||||
arg_cached_set_ = elements;
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto elements =
|
||||
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[&processed_cursor, &
|
||||
@ -791,10 +804,10 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||
input,
|
||||
res,
|
||||
valid_res,
|
||||
elements);
|
||||
*elements);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, *elements);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -832,11 +845,17 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
if (!arg_inited_) {
|
||||
auto elements = std::make_shared<std::set<GetType>>();
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements->insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
arg_cached_set_ = elements;
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto elements =
|
||||
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[&processed_cursor, &
|
||||
@ -907,14 +926,14 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
*elements);
|
||||
} else {
|
||||
processed_size = ProcessDataChunks<Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements);
|
||||
*elements);
|
||||
}
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
@ -935,12 +954,19 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
std::set<GetType> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
if (!arg_inited_) {
|
||||
auto elements = std::make_shared<std::set<GetType>>();
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements->insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
arg_cached_set_ = elements;
|
||||
arg_inited_ = true;
|
||||
}
|
||||
if (elements.empty()) {
|
||||
|
||||
auto elements =
|
||||
std::static_pointer_cast<std::set<GetType>>(arg_cached_set_);
|
||||
if (elements->empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
@ -966,7 +992,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
|
||||
pointer, milvus::index::JSONType::ARRAY);
|
||||
if (!target_field.empty()) {
|
||||
ShreddingArrayBsonContainsAllExecutor<GetType> executor(
|
||||
elements);
|
||||
*elements);
|
||||
|
||||
index->ExecutorForShreddingData<std::string_view>(
|
||||
op_ctx_,
|
||||
@ -989,7 +1015,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
|
||||
return;
|
||||
}
|
||||
|
||||
std::set<GetType> tmp_elements(elements);
|
||||
std::set<GetType> tmp_elements(*elements);
|
||||
for (const auto& element : val.value()) {
|
||||
auto value = milvus::BsonView::GetValueFromBsonView<GetType>(
|
||||
element.get_value());
|
||||
|
||||
@ -554,6 +554,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
||||
bool arg_inited_{false};
|
||||
std::shared_ptr<MultiElement> arg_set_;
|
||||
std::shared_ptr<MultiElement> arg_set_double_;
|
||||
std::shared_ptr<void>
|
||||
arg_cached_set_; // For caching std::set<T> or std::vector<T>
|
||||
PinWrapper<index::JsonKeyStats*> pinned_json_stats_{nullptr};
|
||||
};
|
||||
} //namespace exec
|
||||
|
||||
@ -878,16 +878,21 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0);
|
||||
if (!arg_inited_) {
|
||||
std::vector<uint8_t> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
vals.emplace_back(GetValueFromProto<bool>(val) ? 1 : 0);
|
||||
}
|
||||
arg_set_ = std::make_shared<FlatVectorElement<uint8_t>>(vals);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto execute_sub_batch = [](Index* index_ptr,
|
||||
const std::vector<uint8_t>& vals) {
|
||||
TermIndexFunc<bool> func;
|
||||
return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
|
||||
};
|
||||
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals);
|
||||
auto args = std::dynamic_pointer_cast<FlatVectorElement<uint8_t>>(arg_set_);
|
||||
auto res = ProcessIndexChunks<bool>(execute_sub_batch, args->values_);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user