From 85a7a7b1e3963eddb3a38bdafad62589234da5f3 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Wed, 10 Dec 2025 13:59:13 +0800 Subject: [PATCH] fix: skip json path index if the query path includes number (#46200) issue: #45511 our tantivy inverted index currently does not include item index if the value is an array, thus we can't do `a[0] == 'b'` type of look up in the inverted index. for such, we need to skip the index and use brute force search. we may improve our index in the future, so this is a temp solution Signed-off-by: Buqian Zheng --- internal/core/src/exec/expression/Expr.h | 54 ++++++++++++++++++- .../test_milvus_client_search.py | 1 - 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 5cb8a9644b..46406f000b 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -1431,7 +1431,59 @@ class SegmentExpr : public Expr { CanUseIndex() const { // Ngram index should be used in specific execution path (CanUseNgramIndex -> ExecNgramMatch). // TODO: if multiple indexes are supported, this logic should be changed - return num_index_chunk_ != 0 && !CanUseNgramIndex(); + if (num_index_chunk_ == 0 || CanUseNgramIndex()) { + return false; + } + + // For JSON fields with JsonFlatIndex, check if prefix matching is valid. + // Tantivy JSON index can handle nested object paths (e.g., "a.b") but NOT + // numeric array indices (e.g., "a.0"). Per RFC 6901, JSON Pointer doesn't + // distinguish between array indices and object keys syntactically. Since + // Tantivy doesn't store array index information, we must fall back to + // brute-force search when the relative path contains numeric segments. + if (field_type_ != DataType::JSON || pinned_index_.empty()) { + return true; + } + + auto json_flat_index = + dynamic_cast(pinned_index_[0].get()); + if (json_flat_index == nullptr) { + return true; + } + + auto index_path = json_flat_index->GetNestedPath(); + auto query_path = milvus::Json::pointer(nested_path_); + + // Exact match - safe to use index + if (index_path == query_path) { + return true; + } + + // PinJsonIndex guarantees index_path is a prefix of query_path + + // Get relative path (e.g., if index_path="/a" and query_path="/a/0/b", + // relative_path="/0/b") + auto relative_path = query_path.substr(index_path.length()); + + // Check if any path segment is numeric (potential array index) + size_t pos = 0; + while (pos < relative_path.length()) { + if (relative_path[pos] == '/') { + pos++; + continue; + } + size_t end = relative_path.find('/', pos); + if (end == std::string::npos) { + end = relative_path.length(); + } + auto segment = relative_path.substr(pos, end - pos); + if (!segment.empty() && milvus::IsInteger(segment)) { + return false; + } + pos = end; + } + + return true; } template diff --git a/tests/python_client/milvus_client/test_milvus_client_search.py b/tests/python_client/milvus_client/test_milvus_client_search.py index 7c1862e95b..8f9f35d30c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_search.py @@ -3633,7 +3633,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): "limit": limit}) -@pytest.mark.skip(reason="issue #45511") class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): """ Test case of search interface """