fix: some fixes for ngram index (#46405)

issue: https://github.com/milvus-io/milvus/issues/42053

The splitted literals in `match` execution should be handled in `and`
manner rather than `or`.

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
Spade A 2025-12-19 16:13:19 +08:00 committed by GitHub
parent ad8aba7cb4
commit ab9bec0a6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 2 deletions

View File

@ -395,13 +395,15 @@ NgramInvertedIndex::MatchQuery(const std::string& literal,
root_span->SetAttribute("match_query_min_gram", min_gram_);
root_span->SetAttribute("match_query_max_gram", max_gram_);
}
TargetBitmap bitset{static_cast<size_t>(Count())};
TargetBitmap bitset(static_cast<size_t>(Count()), true);
auto literals = split_by_wildcard(literal);
for (const auto& l : literals) {
if (l.length() < min_gram_) {
return std::nullopt;
}
wrapper_->ngram_match_query(l, min_gram_, max_gram_, &bitset);
TargetBitmap tmp_bitset(static_cast<size_t>(Count()), false);
wrapper_->ngram_match_query(l, min_gram_, max_gram_, &tmp_bitset);
bitset &= tmp_bitset;
}
TargetBitmapView res(bitset);

View File

@ -49,6 +49,11 @@ class NgramInvertedIndex : public InvertedIndexTantivy<std::string> {
proto::plan::OpType op_type,
exec::SegmentExpr* segment);
ScalarIndexType
GetIndexType() const override {
return ScalarIndexType::NGRAM;
}
void
finish() {
this->wrapper_->finish();

View File

@ -38,6 +38,7 @@ enum class ScalarIndexType {
HYBRID,
JSONSTATS,
RTREE,
NGRAM,
};
inline std::string
@ -57,6 +58,8 @@ ToString(ScalarIndexType type) {
return "HYBRID";
case ScalarIndexType::RTREE:
return "RTREE";
case ScalarIndexType::NGRAM:
return "NGRAM";
default:
return "UNKNOWN";
}