enhance: optimize term expr performance (#45490)

issue: https://github.com/milvus-io/milvus/issues/45641
pr: https://github.com/milvus-io/milvus/pull/45491

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2025-11-19 11:51:06 +08:00 committed by GitHub
parent e695d8a1d0
commit f1844c9841
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 36 additions and 29 deletions

View File

@ -113,7 +113,7 @@ class FixedWidthChunk : public Chunk {
std::unique_ptr<MmapFileRAII> mmap_file_raii = nullptr) std::unique_ptr<MmapFileRAII> mmap_file_raii = nullptr)
: Chunk(row_nums, data, size, nullable, std::move(mmap_file_raii)), : Chunk(row_nums, data, size, nullable, std::move(mmap_file_raii)),
dim_(dim), dim_(dim),
element_size_(element_size) {}; element_size_(element_size){};
milvus::SpanBase milvus::SpanBase
Span() const { Span() const {

View File

@ -139,7 +139,6 @@ InitDefaultDeleteDumpBatchSize(int32_t val) {
val); val);
} }
void void
InitTrace(CTraceConfig* config) { InitTrace(CTraceConfig* config) {
auto traceConfig = milvus::tracer::TraceConfig{config->exporter, auto traceConfig = milvus::tracer::TraceConfig{config->exporter,

View File

@ -198,7 +198,7 @@ class FlatVectorElement : public MultiElement {
In(const ValueType& value) const override { In(const ValueType& value) const override {
if (std::holds_alternative<T>(value)) { if (std::holds_alternative<T>(value)) {
for (const auto& v : values_) { for (const auto& v : values_) {
if (v == value) if (v == std::get<T>(value))
return true; return true;
} }
} }

View File

@ -908,7 +908,7 @@ class SegmentExpr : public Expr {
template <typename T, typename FUNC, typename... ValTypes> template <typename T, typename FUNC, typename... ValTypes>
VectorPtr VectorPtr
ProcessIndexChunks(FUNC func, ValTypes... values) { ProcessIndexChunks(FUNC func, const ValTypes&... values) {
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;

View File

@ -66,7 +66,6 @@ class PhyGISFunctionFilterExpr : public SegmentExpr {
} }
} }
private: private:
VectorPtr VectorPtr
EvalForIndexSegment(); EvalForIndexSegment();

View File

@ -810,29 +810,36 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
return nullptr; return nullptr;
} }
std::vector<IndexInnerType> vals; if (!arg_inited_) {
for (auto& val : expr_->vals_) { std::vector<IndexInnerType> vals;
if constexpr (std::is_same_v<T, double>) { for (auto& val : expr_->vals_) {
if (val.has_int64_val()) { if constexpr (std::is_same_v<T, double>) {
// only json field will cast int to double because other fields are casted in proxy if (val.has_int64_val()) {
vals.emplace_back(static_cast<double>(val.int64_val())); // only json field will cast int to double because other fields are casted in proxy
continue; vals.emplace_back(static_cast<double>(val.int64_val()));
continue;
}
}
// Generic overflow handling for all types
bool overflowed = false;
auto converted_val =
GetValueFromProtoWithOverflow<T>(val, overflowed);
if (!overflowed) {
vals.emplace_back(converted_val);
} }
} }
arg_set_ = std::make_shared<FlatVectorElement<IndexInnerType>>(vals);
// Generic overflow handling for all types arg_inited_ = true;
bool overflowed = false;
auto converted_val = GetValueFromProtoWithOverflow<T>(val, overflowed);
if (!overflowed) {
vals.emplace_back(converted_val);
}
} }
auto execute_sub_batch = [](Index* index_ptr, auto execute_sub_batch = [](Index* index_ptr,
const std::vector<IndexInnerType>& vals) { const std::vector<IndexInnerType>& vals) {
TermIndexFunc<T> func; TermIndexFunc<T> func;
return func(index_ptr, vals.size(), vals.data()); return func(index_ptr, vals.size(), vals.data());
}; };
auto res = ProcessIndexChunks<T>(execute_sub_batch, vals); auto args =
std::dynamic_pointer_cast<FlatVectorElement<IndexInnerType>>(arg_set_);
auto res = ProcessIndexChunks<T>(execute_sub_batch, args->values_);
AssertInfo(res->size() == real_batch_size, AssertInfo(res->size() == real_batch_size,
"internal error: expr processed rows {} not equal " "internal error: expr processed rows {} not equal "
"expect batch size {}", "expect batch size {}",

View File

@ -575,7 +575,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
}; };
} else { } else {
auto size_per_chunk = segment_->size_per_chunk(); auto size_per_chunk = segment_->size_per_chunk();
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{ retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
auto chunk_idx = offset / size_per_chunk; auto chunk_idx = offset / size_per_chunk;
auto chunk_offset = offset % size_per_chunk; auto chunk_offset = offset % size_per_chunk;
const auto& chunk = const auto& chunk =

View File

@ -767,7 +767,7 @@ class GISFunctionFilterExpr : public ITypeFilterExpr {
: column_(cloumn), : column_(cloumn),
op_(op), op_(op),
geometry_wkt_(geometry_wkt), geometry_wkt_(geometry_wkt),
distance_(distance) {}; distance_(distance){};
std::string std::string
ToString() const override { ToString() const override {
if (op_ == proto::plan::GISFunctionFilterExpr_GISOp_DWithin) { if (op_ == proto::plan::GISFunctionFilterExpr_GISOp_DWithin) {

View File

@ -214,7 +214,8 @@ class DeletedRecord {
SortedDeleteList::Accessor accessor(deleted_lists_); SortedDeleteList::Accessor accessor(deleted_lists_);
int total_size = accessor.size(); int total_size = accessor.size();
while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE) { while (total_size - dumped_entry_count_.load() >
DELETE_DUMP_BATCH_SIZE) {
int32_t bitsize = 0; int32_t bitsize = 0;
if constexpr (is_sealed) { if constexpr (is_sealed) {
bitsize = sealed_row_count_; bitsize = sealed_row_count_;
@ -232,11 +233,14 @@ class DeletedRecord {
snapshots_.back().second.size()); snapshots_.back().second.size());
} }
while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE && while (total_size - dumped_entry_count_.load() >
DELETE_DUMP_BATCH_SIZE &&
it != accessor.end()) { it != accessor.end()) {
Timestamp dump_ts = 0; Timestamp dump_ts = 0;
for (auto size = 0; size < DELETE_DUMP_BATCH_SIZE && it != accessor.end(); ++it, ++size) { for (auto size = 0;
size < DELETE_DUMP_BATCH_SIZE && it != accessor.end();
++it, ++size) {
bitmap.set(it->second); bitmap.set(it->second);
dump_ts = it->first; dump_ts = it->first;
} }

View File

@ -294,8 +294,7 @@ class FileWriter {
// for global configuration // for global configuration
static WriteMode static WriteMode
mode_; // The write mode, which can be 'buffered' (default) or 'direct'. mode_; // The write mode, which can be 'buffered' (default) or 'direct'.
static size_t static size_t buffer_size_;
buffer_size_;
// for rate limiter // for rate limiter
io::Priority priority_; io::Priority priority_;

View File

@ -343,8 +343,7 @@ GenerateRandomSparseFloatVector(size_t rows,
return tensor; return tensor;
} }
inline std::string inline std::string generateRandomPoint() {
generateRandomPoint() {
return "POINT(" + return "POINT(" +
std::to_string(static_cast<double>(rand()) / RAND_MAX * 360.0 - std::to_string(static_cast<double>(rand()) / RAND_MAX * 360.0 -
180.0) + 180.0) +