diff --git a/internal/core/src/common/bson_view.h b/internal/core/src/common/bson_view.h index 1e7788df0f..757e67cf4e 100644 --- a/internal/core/src/common/bson_view.h +++ b/internal/core/src/common/bson_view.h @@ -198,6 +198,59 @@ class BsonView { : data_(data.data()), size_(data.size()) { } + // Core implementation: check if a BSON value is "empty" (null or recursively contains only nulls/empties) + // Following the same semantics as Json::isObjectEmpty/isDocEmpty in Json.h + static bool + IsBsonValueEmpty(const bsoncxx::types::bson_value::view& val) { + switch (val.type()) { + case bsoncxx::type::k_null: + return true; + case bsoncxx::type::k_document: + return IsBsonValueEmpty(val.get_document().value); + case bsoncxx::type::k_array: + return IsBsonValueEmpty(val.get_array().value); + default: + return false; + } + } + + static bool + IsBsonValueEmpty(const bsoncxx::document::view& doc) { + for (auto&& elem : doc) { + if (!IsBsonValueEmpty(elem.get_value())) { + return false; + } + } + return true; + } + + static bool + IsBsonValueEmpty(const bsoncxx::array::view& arr) { + for (auto&& elem : arr) { + if (!IsBsonValueEmpty(elem.get_value())) { + return false; + } + } + return true; + } + + bool + IsBsonValueEmpty(size_t offset) const { + AssertInfo(offset < size_, "bson offset out of range"); + auto field = ParseBsonField(data_, offset); + + switch (field.type) { + case bsoncxx::type::k_null: + return true; + case bsoncxx::type::k_document: + return IsBsonValueEmpty(ParseAsDocument(field.value_ptr)); + case bsoncxx::type::k_array: + return IsBsonValueEmpty(ParseAsArray(field.value_ptr)); + default: + return false; + } + } + explicit BsonView(const uint8_t* data, size_t size) : data_(data), size_(size) { } @@ -441,7 +494,7 @@ class BsonView { } inline BsonRawField - ParseBsonField(const uint8_t* bson_data, size_t offset) { + ParseBsonField(const uint8_t* bson_data, size_t offset) const { const uint8_t* ptr = bson_data + offset; auto type_tag = static_cast(*ptr++); diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index 0dee783796..60f411e361 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -204,23 +204,28 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegmentByStats() { Assert(index != nullptr); cached_index_chunk_res_ = std::make_shared(active_count_); - cached_index_chunk_valid_res_ = - std::make_shared(active_count_, true); TargetBitmapView res_view(*cached_index_chunk_res_); - TargetBitmapView valid_res_view(*cached_index_chunk_valid_res_); - // process shredding data - auto shredding_fields = index->GetShreddingFields(pointer); + // process shredding data, for exists, we only need to check the fields + // that start with the given prefix which contains the given pointer + auto shredding_fields = index->GetShreddingFieldsWithPrefix(pointer); for (const auto& field : shredding_fields) { - index->ExecutorForGettingValid(op_ctx_, field, valid_res_view); - res_view |= valid_res_view; + TargetBitmap temp_valid(active_count_, true); + TargetBitmapView temp_valid_view(temp_valid); + index->ExecutorForGettingValid(op_ctx_, field, temp_valid_view); + res_view |= temp_valid_view; } if (!index->CanSkipShared(pointer)) { - // process shared data - index->ExecuteExistsPathForSharedData(pointer, res_view); + // process shared data, need to check if the value is empty + // which match the semantics of exists in Json.h + index->ExecuteForSharedData( + op_ctx_, + pointer, + [&](BsonView bson, uint32_t row_id, uint32_t offset) { + res_view[row_id] = !bson.IsBsonValueEmpty(offset); + }); } - cached_index_chunk_id_ = 0; } TargetBitmap result; diff --git a/internal/core/src/index/json_stats/JsonKeyStats.h b/internal/core/src/index/json_stats/JsonKeyStats.h index 50a7951df6..7b53c5d698 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.h +++ b/internal/core/src/index/json_stats/JsonKeyStats.h @@ -362,6 +362,26 @@ class JsonKeyStats : public ScalarIndex { return fields; } + // return all shredding fields whose pointers start with the given prefix + // for example, prefix "/a/b" will include fields for "/a/b" and "/a/b/..." + std::set + GetShreddingFieldsWithPrefix(const std::string& prefix) { + std::set fields; + for (const auto& [path, field_names] : key_field_map_) { + if (path.size() >= prefix.size() && + path.compare(0, prefix.size(), prefix) == 0 && + (path.size() == prefix.size() || path[prefix.size()] == '/')) { + for (const auto& field : field_names) { + if (shred_field_data_type_map_.find(field) != + shred_field_data_type_map_.end()) { + fields.insert(field); + } + } + } + } + return fields; + } + std::string GetShreddingField(const std::string& pointer, JSONType type) { if (key_field_map_.find(pointer) == key_field_map_.end()) { diff --git a/tests/python_client/milvus_client/test_milvus_client_query.py b/tests/python_client/milvus_client/test_milvus_client_query.py index 94f5ef56de..9eb171a207 100644 --- a/tests/python_client/milvus_client/test_milvus_client_query.py +++ b/tests/python_client/milvus_client/test_milvus_client_query.py @@ -4167,7 +4167,6 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base): # 3. flush if specified if is_flush: self.flush(client, collection_name) - time.sleep(300) # 4. query when there is no json path index under all expressions # skip negative expression for issue 40685 # "my_json['a'] != 1", "my_json['a'] != 1.0", "my_json['a'] != '1'", "my_json['a'] != 1.1", "my_json['a'] not in [1]"