mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: optimize term expr performance (#45490)
issue: https://github.com/milvus-io/milvus/issues/45641 pr: https://github.com/milvus-io/milvus/pull/45491 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
e695d8a1d0
commit
f1844c9841
@ -113,7 +113,7 @@ class FixedWidthChunk : public Chunk {
|
||||
std::unique_ptr<MmapFileRAII> mmap_file_raii = nullptr)
|
||||
: Chunk(row_nums, data, size, nullable, std::move(mmap_file_raii)),
|
||||
dim_(dim),
|
||||
element_size_(element_size) {};
|
||||
element_size_(element_size){};
|
||||
|
||||
milvus::SpanBase
|
||||
Span() const {
|
||||
|
||||
@ -139,7 +139,6 @@ InitDefaultDeleteDumpBatchSize(int32_t val) {
|
||||
val);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
InitTrace(CTraceConfig* config) {
|
||||
auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
|
||||
|
||||
@ -198,7 +198,7 @@ class FlatVectorElement : public MultiElement {
|
||||
In(const ValueType& value) const override {
|
||||
if (std::holds_alternative<T>(value)) {
|
||||
for (const auto& v : values_) {
|
||||
if (v == value)
|
||||
if (v == std::get<T>(value))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -908,7 +908,7 @@ class SegmentExpr : public Expr {
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
VectorPtr
|
||||
ProcessIndexChunks(FUNC func, ValTypes... values) {
|
||||
ProcessIndexChunks(FUNC func, const ValTypes&... values) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
|
||||
@ -66,7 +66,6 @@ class PhyGISFunctionFilterExpr : public SegmentExpr {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
VectorPtr
|
||||
EvalForIndexSegment();
|
||||
|
||||
@ -810,29 +810,36 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<IndexInnerType> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
if constexpr (std::is_same_v<T, double>) {
|
||||
if (val.has_int64_val()) {
|
||||
// only json field will cast int to double because other fields are casted in proxy
|
||||
vals.emplace_back(static_cast<double>(val.int64_val()));
|
||||
continue;
|
||||
if (!arg_inited_) {
|
||||
std::vector<IndexInnerType> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
if constexpr (std::is_same_v<T, double>) {
|
||||
if (val.has_int64_val()) {
|
||||
// only json field will cast int to double because other fields are casted in proxy
|
||||
vals.emplace_back(static_cast<double>(val.int64_val()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Generic overflow handling for all types
|
||||
bool overflowed = false;
|
||||
auto converted_val =
|
||||
GetValueFromProtoWithOverflow<T>(val, overflowed);
|
||||
if (!overflowed) {
|
||||
vals.emplace_back(converted_val);
|
||||
}
|
||||
}
|
||||
|
||||
// Generic overflow handling for all types
|
||||
bool overflowed = false;
|
||||
auto converted_val = GetValueFromProtoWithOverflow<T>(val, overflowed);
|
||||
if (!overflowed) {
|
||||
vals.emplace_back(converted_val);
|
||||
}
|
||||
arg_set_ = std::make_shared<FlatVectorElement<IndexInnerType>>(vals);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
auto execute_sub_batch = [](Index* index_ptr,
|
||||
const std::vector<IndexInnerType>& vals) {
|
||||
TermIndexFunc<T> func;
|
||||
return func(index_ptr, vals.size(), vals.data());
|
||||
};
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, vals);
|
||||
auto args =
|
||||
std::dynamic_pointer_cast<FlatVectorElement<IndexInnerType>>(arg_set_);
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, args->values_);
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
||||
@ -575,7 +575,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
|
||||
};
|
||||
} else {
|
||||
auto size_per_chunk = segment_->size_per_chunk();
|
||||
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{
|
||||
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
|
||||
auto chunk_idx = offset / size_per_chunk;
|
||||
auto chunk_offset = offset % size_per_chunk;
|
||||
const auto& chunk =
|
||||
|
||||
@ -767,7 +767,7 @@ class GISFunctionFilterExpr : public ITypeFilterExpr {
|
||||
: column_(cloumn),
|
||||
op_(op),
|
||||
geometry_wkt_(geometry_wkt),
|
||||
distance_(distance) {};
|
||||
distance_(distance){};
|
||||
std::string
|
||||
ToString() const override {
|
||||
if (op_ == proto::plan::GISFunctionFilterExpr_GISOp_DWithin) {
|
||||
|
||||
@ -214,7 +214,8 @@ class DeletedRecord {
|
||||
SortedDeleteList::Accessor accessor(deleted_lists_);
|
||||
int total_size = accessor.size();
|
||||
|
||||
while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE) {
|
||||
while (total_size - dumped_entry_count_.load() >
|
||||
DELETE_DUMP_BATCH_SIZE) {
|
||||
int32_t bitsize = 0;
|
||||
if constexpr (is_sealed) {
|
||||
bitsize = sealed_row_count_;
|
||||
@ -232,11 +233,14 @@ class DeletedRecord {
|
||||
snapshots_.back().second.size());
|
||||
}
|
||||
|
||||
while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE &&
|
||||
while (total_size - dumped_entry_count_.load() >
|
||||
DELETE_DUMP_BATCH_SIZE &&
|
||||
it != accessor.end()) {
|
||||
Timestamp dump_ts = 0;
|
||||
|
||||
for (auto size = 0; size < DELETE_DUMP_BATCH_SIZE && it != accessor.end(); ++it, ++size) {
|
||||
for (auto size = 0;
|
||||
size < DELETE_DUMP_BATCH_SIZE && it != accessor.end();
|
||||
++it, ++size) {
|
||||
bitmap.set(it->second);
|
||||
dump_ts = it->first;
|
||||
}
|
||||
|
||||
@ -294,8 +294,7 @@ class FileWriter {
|
||||
// for global configuration
|
||||
static WriteMode
|
||||
mode_; // The write mode, which can be 'buffered' (default) or 'direct'.
|
||||
static size_t
|
||||
buffer_size_;
|
||||
static size_t buffer_size_;
|
||||
|
||||
// for rate limiter
|
||||
io::Priority priority_;
|
||||
|
||||
@ -343,8 +343,7 @@ GenerateRandomSparseFloatVector(size_t rows,
|
||||
return tensor;
|
||||
}
|
||||
|
||||
inline std::string
|
||||
generateRandomPoint() {
|
||||
inline std::string generateRandomPoint() {
|
||||
return "POINT(" +
|
||||
std::to_string(static_cast<double>(rand()) / RAND_MAX * 360.0 -
|
||||
180.0) +
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user