From 5b77d45a277df4a3286f459bc98dd48277db1a94 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Sat, 29 Nov 2025 21:55:09 +0800 Subject: [PATCH] fix: [2.6] term expr to correctly handle in of string in json (#45956) issue: https://github.com/milvus-io/milvus/issues/45887 pr: https://github.com/milvus-io/milvus/pull/45955 --------- Signed-off-by: Buqian Zheng --- internal/core/src/exec/expression/Element.h | 36 +++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/internal/core/src/exec/expression/Element.h b/internal/core/src/exec/expression/Element.h index af302d44d3..15004e5159 100644 --- a/internal/core/src/exec/expression/Element.h +++ b/internal/core/src/exec/expression/Element.h @@ -31,6 +31,18 @@ namespace milvus { namespace exec { +// Transparent string hash for heterogeneous lookup in unordered_dense::set +// See: https://github.com/martinus/unordered_dense#324-heterogeneous-overloads-using-is_transparent +struct StringHash { + using is_transparent = void; // enable heterogeneous overloads + using is_avalanching = void; // mark as high quality avalanching hash + + [[nodiscard]] auto + operator()(std::string_view str) const noexcept -> uint64_t { + return ankerl::unordered_dense::hash{}(str); + } +}; + class BaseElement { public: virtual ~BaseElement() = default; @@ -223,6 +235,13 @@ class FlatVectorElement : public MultiElement { return true; } } + // Handle string_view -> string comparison when T is std::string + if constexpr (std::is_same_v) { + if (auto sv = std::get_if(&value)) { + return std::find(values_.begin(), values_.end(), *sv) != + values_.end(); + } + } return false; } @@ -242,6 +261,13 @@ class FlatVectorElement : public MultiElement { template class SetElement : public MultiElement { + // Use transparent hash for std::string to enable heterogeneous lookup + // This allows O(1) lookup with string_view without string copy + using SetType = std::conditional_t< + std::is_same_v, + ankerl::unordered_dense::set>, + ankerl::unordered_dense::set>; + public: explicit SetElement(const std::vector& values) { for (auto& value : values) { @@ -263,7 +289,13 @@ class SetElement : public MultiElement { bool In(const ValueType& value) const override { if (std::holds_alternative(value)) { - return values_.count(std::get(value)) > 0; + return values_.find(std::get(value)) != values_.end(); + } + // Handle string_view -> string comparison when T is std::string + if constexpr (std::is_same_v) { + if (auto sv = std::get_if(&value)) { + return values_.find(*sv) != values_.end(); + } } return false; } @@ -284,7 +316,7 @@ class SetElement : public MultiElement { } public: - ankerl::unordered_dense::set values_; + SetType values_; }; template <>