From ab9bec0a6dc1d50dab7989372c2aa31ab284920c Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 19 Dec 2025 16:13:19 +0800 Subject: [PATCH] fix: some fixes for ngram index (#46405) issue: https://github.com/milvus-io/milvus/issues/42053 The splitted literals in `match` execution should be handled in `and` manner rather than `or`. Signed-off-by: SpadeA --- internal/core/src/index/NgramInvertedIndex.cpp | 6 ++++-- internal/core/src/index/NgramInvertedIndex.h | 5 +++++ internal/core/src/index/ScalarIndex.h | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/internal/core/src/index/NgramInvertedIndex.cpp b/internal/core/src/index/NgramInvertedIndex.cpp index 5bfc8007c0..6395a28c54 100644 --- a/internal/core/src/index/NgramInvertedIndex.cpp +++ b/internal/core/src/index/NgramInvertedIndex.cpp @@ -395,13 +395,15 @@ NgramInvertedIndex::MatchQuery(const std::string& literal, root_span->SetAttribute("match_query_min_gram", min_gram_); root_span->SetAttribute("match_query_max_gram", max_gram_); } - TargetBitmap bitset{static_cast(Count())}; + TargetBitmap bitset(static_cast(Count()), true); auto literals = split_by_wildcard(literal); for (const auto& l : literals) { if (l.length() < min_gram_) { return std::nullopt; } - wrapper_->ngram_match_query(l, min_gram_, max_gram_, &bitset); + TargetBitmap tmp_bitset(static_cast(Count()), false); + wrapper_->ngram_match_query(l, min_gram_, max_gram_, &tmp_bitset); + bitset &= tmp_bitset; } TargetBitmapView res(bitset); diff --git a/internal/core/src/index/NgramInvertedIndex.h b/internal/core/src/index/NgramInvertedIndex.h index 43bc828b0d..65b68e545c 100644 --- a/internal/core/src/index/NgramInvertedIndex.h +++ b/internal/core/src/index/NgramInvertedIndex.h @@ -49,6 +49,11 @@ class NgramInvertedIndex : public InvertedIndexTantivy { proto::plan::OpType op_type, exec::SegmentExpr* segment); + ScalarIndexType + GetIndexType() const override { + return ScalarIndexType::NGRAM; + } + void finish() { this->wrapper_->finish(); diff --git a/internal/core/src/index/ScalarIndex.h b/internal/core/src/index/ScalarIndex.h index 404880fadf..d56bddbf29 100644 --- a/internal/core/src/index/ScalarIndex.h +++ b/internal/core/src/index/ScalarIndex.h @@ -38,6 +38,7 @@ enum class ScalarIndexType { HYBRID, JSONSTATS, RTREE, + NGRAM, }; inline std::string @@ -57,6 +58,8 @@ ToString(ScalarIndexType type) { return "HYBRID"; case ScalarIndexType::RTREE: return "RTREE"; + case ScalarIndexType::NGRAM: + return "NGRAM"; default: return "UNKNOWN"; }