fix: skip json path index if the query path includes number (#46200)

issue: #45511

our tantivy inverted index currently does not include item index if the
value is an array, thus we can't do `a[0] == 'b'` type of look up in the
inverted index. for such, we need to skip the index and use brute force
search.

we may improve our index in the future, so this is a temp solution

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
Buqian Zheng 2025-12-10 13:59:13 +08:00 committed by GitHub
parent bb486c0db3
commit 85a7a7b1e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 53 additions and 2 deletions

View File

@ -1431,7 +1431,59 @@ class SegmentExpr : public Expr {
CanUseIndex() const {
// Ngram index should be used in specific execution path (CanUseNgramIndex -> ExecNgramMatch).
// TODO: if multiple indexes are supported, this logic should be changed
return num_index_chunk_ != 0 && !CanUseNgramIndex();
if (num_index_chunk_ == 0 || CanUseNgramIndex()) {
return false;
}
// For JSON fields with JsonFlatIndex, check if prefix matching is valid.
// Tantivy JSON index can handle nested object paths (e.g., "a.b") but NOT
// numeric array indices (e.g., "a.0"). Per RFC 6901, JSON Pointer doesn't
// distinguish between array indices and object keys syntactically. Since
// Tantivy doesn't store array index information, we must fall back to
// brute-force search when the relative path contains numeric segments.
if (field_type_ != DataType::JSON || pinned_index_.empty()) {
return true;
}
auto json_flat_index =
dynamic_cast<const index::JsonFlatIndex*>(pinned_index_[0].get());
if (json_flat_index == nullptr) {
return true;
}
auto index_path = json_flat_index->GetNestedPath();
auto query_path = milvus::Json::pointer(nested_path_);
// Exact match - safe to use index
if (index_path == query_path) {
return true;
}
// PinJsonIndex guarantees index_path is a prefix of query_path
// Get relative path (e.g., if index_path="/a" and query_path="/a/0/b",
// relative_path="/0/b")
auto relative_path = query_path.substr(index_path.length());
// Check if any path segment is numeric (potential array index)
size_t pos = 0;
while (pos < relative_path.length()) {
if (relative_path[pos] == '/') {
pos++;
continue;
}
size_t end = relative_path.find('/', pos);
if (end == std::string::npos) {
end = relative_path.length();
}
auto segment = relative_path.substr(pos, end - pos);
if (!segment.empty() && milvus::IsInteger(segment)) {
return false;
}
pos = end;
}
return true;
}
template <typename T>

View File

@ -3633,7 +3633,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
"limit": limit})
@pytest.mark.skip(reason="issue #45511")
class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base):
""" Test case of search interface """