mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix:unified json exists path semantic (#44916)
#44927 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
4dc75a6e2c
commit
b7935557e1
@ -198,6 +198,59 @@ class BsonView {
|
||||
: data_(data.data()), size_(data.size()) {
|
||||
}
|
||||
|
||||
// Core implementation: check if a BSON value is "empty" (null or recursively contains only nulls/empties)
|
||||
// Following the same semantics as Json::isObjectEmpty/isDocEmpty in Json.h
|
||||
static bool
|
||||
IsBsonValueEmpty(const bsoncxx::types::bson_value::view& val) {
|
||||
switch (val.type()) {
|
||||
case bsoncxx::type::k_null:
|
||||
return true;
|
||||
case bsoncxx::type::k_document:
|
||||
return IsBsonValueEmpty(val.get_document().value);
|
||||
case bsoncxx::type::k_array:
|
||||
return IsBsonValueEmpty(val.get_array().value);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
IsBsonValueEmpty(const bsoncxx::document::view& doc) {
|
||||
for (auto&& elem : doc) {
|
||||
if (!IsBsonValueEmpty(elem.get_value())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
IsBsonValueEmpty(const bsoncxx::array::view& arr) {
|
||||
for (auto&& elem : arr) {
|
||||
if (!IsBsonValueEmpty(elem.get_value())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IsBsonValueEmpty(size_t offset) const {
|
||||
AssertInfo(offset < size_, "bson offset out of range");
|
||||
auto field = ParseBsonField(data_, offset);
|
||||
|
||||
switch (field.type) {
|
||||
case bsoncxx::type::k_null:
|
||||
return true;
|
||||
case bsoncxx::type::k_document:
|
||||
return IsBsonValueEmpty(ParseAsDocument(field.value_ptr));
|
||||
case bsoncxx::type::k_array:
|
||||
return IsBsonValueEmpty(ParseAsArray(field.value_ptr));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
explicit BsonView(const uint8_t* data, size_t size)
|
||||
: data_(data), size_(size) {
|
||||
}
|
||||
@ -441,7 +494,7 @@ class BsonView {
|
||||
}
|
||||
|
||||
inline BsonRawField
|
||||
ParseBsonField(const uint8_t* bson_data, size_t offset) {
|
||||
ParseBsonField(const uint8_t* bson_data, size_t offset) const {
|
||||
const uint8_t* ptr = bson_data + offset;
|
||||
auto type_tag = static_cast<bsoncxx::type>(*ptr++);
|
||||
|
||||
|
||||
@ -204,23 +204,28 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegmentByStats() {
|
||||
Assert(index != nullptr);
|
||||
|
||||
cached_index_chunk_res_ = std::make_shared<TargetBitmap>(active_count_);
|
||||
cached_index_chunk_valid_res_ =
|
||||
std::make_shared<TargetBitmap>(active_count_, true);
|
||||
TargetBitmapView res_view(*cached_index_chunk_res_);
|
||||
TargetBitmapView valid_res_view(*cached_index_chunk_valid_res_);
|
||||
|
||||
// process shredding data
|
||||
auto shredding_fields = index->GetShreddingFields(pointer);
|
||||
// process shredding data, for exists, we only need to check the fields
|
||||
// that start with the given prefix which contains the given pointer
|
||||
auto shredding_fields = index->GetShreddingFieldsWithPrefix(pointer);
|
||||
for (const auto& field : shredding_fields) {
|
||||
index->ExecutorForGettingValid(op_ctx_, field, valid_res_view);
|
||||
res_view |= valid_res_view;
|
||||
TargetBitmap temp_valid(active_count_, true);
|
||||
TargetBitmapView temp_valid_view(temp_valid);
|
||||
index->ExecutorForGettingValid(op_ctx_, field, temp_valid_view);
|
||||
res_view |= temp_valid_view;
|
||||
}
|
||||
|
||||
if (!index->CanSkipShared(pointer)) {
|
||||
// process shared data
|
||||
index->ExecuteExistsPathForSharedData(pointer, res_view);
|
||||
// process shared data, need to check if the value is empty
|
||||
// which match the semantics of exists in Json.h
|
||||
index->ExecuteForSharedData(
|
||||
op_ctx_,
|
||||
pointer,
|
||||
[&](BsonView bson, uint32_t row_id, uint32_t offset) {
|
||||
res_view[row_id] = !bson.IsBsonValueEmpty(offset);
|
||||
});
|
||||
}
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
|
||||
TargetBitmap result;
|
||||
|
||||
@ -362,6 +362,26 @@ class JsonKeyStats : public ScalarIndex<std::string> {
|
||||
return fields;
|
||||
}
|
||||
|
||||
// return all shredding fields whose pointers start with the given prefix
|
||||
// for example, prefix "/a/b" will include fields for "/a/b" and "/a/b/..."
|
||||
std::set<std::string>
|
||||
GetShreddingFieldsWithPrefix(const std::string& prefix) {
|
||||
std::set<std::string> fields;
|
||||
for (const auto& [path, field_names] : key_field_map_) {
|
||||
if (path.size() >= prefix.size() &&
|
||||
path.compare(0, prefix.size(), prefix) == 0 &&
|
||||
(path.size() == prefix.size() || path[prefix.size()] == '/')) {
|
||||
for (const auto& field : field_names) {
|
||||
if (shred_field_data_type_map_.find(field) !=
|
||||
shred_field_data_type_map_.end()) {
|
||||
fields.insert(field);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetShreddingField(const std::string& pointer, JSONType type) {
|
||||
if (key_field_map_.find(pointer) == key_field_map_.end()) {
|
||||
|
||||
@ -4167,7 +4167,6 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base):
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
time.sleep(300)
|
||||
# 4. query when there is no json path index under all expressions
|
||||
# skip negative expression for issue 40685
|
||||
# "my_json['a'] != 1", "my_json['a'] != 1.0", "my_json['a'] != '1'", "my_json['a'] != 1.1", "my_json['a'] not in [1]"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user