milvus/internal/core/src/common/JsonUtils.cpp
Bingyi Sun fbf5cb4e62
feat: Add json flat index (#39917)
issue: https://github.com/milvus-io/milvus/issues/35528

This PR introduces a JSON flat index that allows indexing JSON fields
and dynamic fields in the same way as other field types.

In a previous PR (#36750), we implemented a JSON index that requires
specifying a JSON path and casting a type. The only distinction lies in
the json_cast_type parameter. When json_cast_type is set to JSON type,
Milvus automatically creates a JSON flat index.

For details on how Tantivy interprets JSON data, refer to the [tantivy
documentation](https://github.com/quickwit-oss/tantivy/blob/main/doc/src/json.md#pitfalls-limitation-and-corner-cases).

Limitations
Array handling: Arrays do not function as nested objects. See the
[limitations
section](https://github.com/quickwit-oss/tantivy/blob/main/doc/src/json.md#arrays-do-not-work-like-nested-object)
for more details.

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
2025-06-10 19:14:35 +08:00

75 lines
2.5 KiB
C++

#include "common/JsonUtils.h"
namespace milvus {
// Parse a JSON Pointer into unescaped path segments
std::vector<std::string>
parse_json_pointer(const std::string& pointer) {
std::vector<std::string> tokens;
if (pointer.empty())
return tokens; // Root path (entire document)
if (pointer[0] != '/') {
throw std::invalid_argument(
"Invalid JSON Pointer: must start with '/'");
}
size_t start = 1;
while (start < pointer.size()) {
size_t end = pointer.find('/', start);
if (end == std::string::npos)
end = pointer.size();
std::string token = pointer.substr(start, end - start);
// Replace ~1 with / and ~0 with ~
size_t pos = 0;
while ((pos = token.find("~1", pos)) != std::string::npos) {
token.replace(pos, 2, "/");
pos += 1; // Avoid infinite loops on overlapping replacements
}
pos = 0;
while ((pos = token.find("~0", pos)) != std::string::npos) {
token.replace(pos, 2, "~");
pos += 1;
}
tokens.push_back(token);
start = end + 1;
}
return tokens;
}
// Check if a JSON Pointer path exists
bool
path_exists(const simdjson::dom::element& root,
const std::vector<std::string>& tokens) {
simdjson::dom::element current = root;
for (const auto& token : tokens) {
if (current.type() == simdjson::dom::element_type::OBJECT) {
auto obj = current.get_object();
if (obj.error())
return false;
auto next = obj.value().at_key(token);
if (next.error())
return false;
current = next.value();
} else if (current.type() == simdjson::dom::element_type::ARRAY) {
if (token == "-")
return false; // "-" is invalid for existence checks
char* endptr;
long index = strtol(token.c_str(), &endptr, 10);
if (*endptr != '\0' || index < 0)
return false; // Not a valid index
auto arr = current.get_array();
if (arr.error())
return false;
if (static_cast<size_t>(index) >= arr.value().size())
return false;
auto next = arr.value().at(index);
if (next.error())
return false;
current = next.value();
} else {
return false; // Path cannot be resolved
}
}
return true;
}
} // namespace milvus