enhance: using set element for string term type (#43049)

issue: #43048

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-07-31 10:35:37 +08:00 committed by GitHub
parent 31801f5937
commit 708e426bb3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 6 deletions

View File

@ -47,6 +47,7 @@ class MilvusConan(ConanFile):
"rapidjson/cci.20230929#624c0094d741e6a3749d2e44d834b96c",
"simde/0.8.2#5e1edfd5cba92f25d79bf6ef4616b972",
"xxhash/0.8.3#199e63ab9800302c232d030b27accec0",
"unordered_dense/4.4.0#6a855c992618cc4c63019109a2e47298",
)
generators = ("cmake", "cmake_find_package")
default_options = {

View File

@ -27,6 +27,7 @@
#include "exec/QueryContext.h"
#include "expr/ITypeExpr.h"
#include "query/PlanProto.h"
#include "ankerl/unordered_dense.h"
namespace milvus {
namespace exec {
@ -243,6 +244,7 @@ class SetElement : public MultiElement {
if (std::holds_alternative<T>(value)) {
return values_.count(std::get<T>(value)) > 0;
}
return false;
}
void
@ -256,7 +258,64 @@ class SetElement : public MultiElement {
}
public:
std::set<T> values_;
ankerl::unordered_dense::set<T> values_;
};
template <>
class SetElement<bool> : public MultiElement {
public:
explicit SetElement(const std::vector<proto::plan::GenericValue>& values) {
for (auto& value : values) {
bool v = GetValueFromProto<bool>(value);
if (v) {
contains_true = true;
} else {
contains_false = true;
}
}
}
explicit SetElement(const std::vector<bool>& values) {
for (const auto& value : values) {
if (value) {
contains_true = true;
} else {
contains_false = true;
}
}
}
bool
Empty() const override {
return !contains_true && !contains_false;
}
bool
In(const ValueType& value) const override {
if (std::holds_alternative<bool>(value)) {
bool v = std::get<bool>(value);
return (v && contains_true) || (!v && contains_false);
}
return false;
}
void
AddElement(const bool& value) {
if (value) {
contains_true = true;
} else {
contains_false = true;
}
}
size_t
Size() const override {
return (contains_true ? 1 : 0) + (contains_false ? 1 : 0);
}
private:
bool contains_true = false;
bool contains_false = false;
};
} //namespace exec

View File

@ -361,7 +361,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable(EvalCtx& context) {
index = std::stoi(expr_->column_.nested_path_[0]);
}
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
arg_inited_ = true;
}
@ -538,10 +538,10 @@ PhyTermFilterExpr::ExecJsonInVariableByKeyIndex() {
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
if constexpr (std::is_same_v<GetType, double>) {
arg_set_float_ =
std::make_shared<SortVectorElement<float>>(expr_->vals_);
std::make_shared<SetElement<float>>(expr_->vals_);
}
arg_inited_ = true;
}
@ -740,7 +740,7 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
if (!arg_inited_) {
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
arg_inited_ = true;
}
@ -930,7 +930,7 @@ PhyTermFilterExpr::ExecVisitorImplForData(EvalCtx& context) {
vals.emplace_back(converted_val);
}
}
arg_set_ = std::make_shared<SortVectorElement<T>>(vals);
arg_set_ = std::make_shared<SetElement<T>>(vals);
arg_inited_ = true;
}