From 09dad3538957feb91f0c7fcd05c162284b0d219e Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Tue, 15 Apr 2025 18:36:32 +0800 Subject: [PATCH] enhance: use scan mode for like although inverted index exists (#41309) #41065 Signed-off-by: luzhang Co-authored-by: luzhang --- internal/core/src/exec/expression/Expr.h | 21 +++++++------------ .../core/src/exec/expression/UnaryExpr.cpp | 17 +++++++++++---- .../core/src/index/InvertedIndexTantivy.h | 6 ++++++ internal/core/src/index/ScalarIndex.h | 5 +++++ 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 3f306b380d..8c9d75566b 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -1183,20 +1183,15 @@ class SegmentExpr : public Expr { } using Index = index::ScalarIndex; - if (op == OpType::Match) { - for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) { - const Index& index = - segment_->chunk_scalar_index(field_id_, i); - // 1, index support regex query, then index handles the query; - // 2, index has raw data, then call index.Reverse_Lookup to handle the query; - if (!index.SupportRegexQuery() && !index.HasRawData()) { - return false; - } - // all chunks have same index. - return true; - } + if (op == OpType::Match || op == OpType::InnerMatch || + op == OpType::PostfixMatch) { + const Index& index = segment_->chunk_scalar_index( + field_id_, current_index_chunk_); + // 1, index support regex query and try use it, then index handles the query; + // 2, index has raw data, then call index.Reverse_Lookup to handle the query; + return (index.TryUseRegexQuery() && index.SupportRegexQuery()) || + index.HasRawData(); } - return true; } diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index d76bebb1e5..3f5c49f77b 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -1860,17 +1860,26 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) { template bool PhyUnaryRangeFilterExpr::CanUseIndex() { - bool res = is_index_mode_ && SegmentExpr::CanUseIndex(expr_->op_type_); - use_index_ = res; - return res; + use_index_ = is_index_mode_ && SegmentExpr::CanUseIndex(expr_->op_type_); + return use_index_; } bool PhyUnaryRangeFilterExpr::CanUseIndexForJson(DataType val_type) { - use_index_ = + auto has_index = segment_->HasIndex(field_id_, milvus::Json::pointer(expr_->column_.nested_path_), val_type); + switch (val_type) { + case DataType::STRING: + use_index_ = has_index && + expr_->op_type_ != proto::plan::OpType::Match && + expr_->op_type_ != proto::plan::OpType::PostfixMatch && + expr_->op_type_ != proto::plan::OpType::InnerMatch; + break; + default: + use_index_ = has_index; + } return use_index_; } diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 2953acc73c..ed7d3b9247 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -226,6 +226,12 @@ class InvertedIndexTantivy : public ScalarIndex { return std::is_same_v; } + bool + TryUseRegexQuery() const override { + // for inverted index, not use regex query to implement match + return false; + } + const TargetBitmap RegexQuery(const std::string& regex_pattern) override; diff --git a/internal/core/src/index/ScalarIndex.h b/internal/core/src/index/ScalarIndex.h index 4dc58b586a..1314de269c 100644 --- a/internal/core/src/index/ScalarIndex.h +++ b/internal/core/src/index/ScalarIndex.h @@ -151,6 +151,11 @@ class ScalarIndex : public IndexBase { return false; } + virtual bool + TryUseRegexQuery() const { + return true; + } + virtual const TargetBitmap RegexQuery(const std::string& pattern) { PanicInfo(Unsupported, "regex query is not supported");