diff --git a/internal/core/src/index/json_stats/JsonKeyStats.cpp b/internal/core/src/index/json_stats/JsonKeyStats.cpp index f70563c6f8..36587c27d2 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.cpp +++ b/internal/core/src/index/json_stats/JsonKeyStats.cpp @@ -363,7 +363,9 @@ JsonKeyStats::TraverseJsonForBuildStats( std::vector& path, std::map& values) { jsmntok current = tokens[0]; - Assert(current.type != JSMN_UNDEFINED); + AssertInfo(current.type != JSMN_UNDEFINED, + "current token type is undefined for json: {}", + json); if (current.type == JSMN_OBJECT) { if (!path.empty() && current.size == 0) { AddKeyStats( @@ -371,11 +373,20 @@ JsonKeyStats::TraverseJsonForBuildStats( JSONType::OBJECT, std::string(json + current.start, current.end - current.start), values); + index++; return; } int j = 1; for (int i = 0; i < current.size; i++) { - Assert(tokens[j].type == JSMN_STRING && tokens[j].size != 0); + AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0, + "current token type is not string for json: {} at " + "index: {}, type: {}, size: {} value: {}", + json, + int(tokens[j].type), + tokens[j].size, + std::string(json + tokens[j].start, + tokens[j].end - tokens[j].start)); + std::string key(json + tokens[j].start, tokens[j].end - tokens[j].start); path.push_back(key); diff --git a/internal/core/src/index/json_stats/JsonKeyStats.h b/internal/core/src/index/json_stats/JsonKeyStats.h index 68365b06aa..0e82d59a9b 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.h +++ b/internal/core/src/index/json_stats/JsonKeyStats.h @@ -16,6 +16,9 @@ #pragma once +// Forward declaration of test accessor in global namespace for friend declaration +class TraverseJsonForBuildStatsAccessor; + #include #include @@ -667,6 +670,9 @@ class JsonKeyStats : public ScalarIndex { std::string shared_column_field_name_; std::shared_ptr shared_column_; SkipIndex skip_index_; + + // Friend accessor for unit tests to call private methods safely. + friend class ::TraverseJsonForBuildStatsAccessor; }; -} // namespace milvus::index \ No newline at end of file +} // namespace milvus::index diff --git a/internal/core/unittest/test_json_stats/CMakeLists.txt b/internal/core/unittest/test_json_stats/CMakeLists.txt index a9957476f1..010c72122c 100644 --- a/internal/core/unittest/test_json_stats/CMakeLists.txt +++ b/internal/core/unittest/test_json_stats/CMakeLists.txt @@ -16,6 +16,7 @@ set(JSON_STATS_TEST_FILES test_parquet_writer.cpp test_utils.cpp test_json_key_stats.cpp + test_traverse_json_for_build_stats.cpp ) add_executable(json_stats_test diff --git a/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp new file mode 100644 index 0000000000..5d09396a44 --- /dev/null +++ b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp @@ -0,0 +1,116 @@ +// Copyright (C) 2019-2025 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include +#include + +#include "index/json_stats/JsonKeyStats.h" +#include "common/jsmn.h" +#include "storage/ChunkManager.h" +#include "storage/Types.h" +#include "storage/FileManager.h" +#include "storage/Util.h" + +using milvus::index::JsonKey; +using milvus::index::JsonKeyStats; +using milvus::index::JSONType; + +// Friend accessor declared in JsonKeyStats to invoke private method for UT +class TraverseJsonForBuildStatsAccessor { + public: + static void + Call(JsonKeyStats& s, + const char* json, + jsmntok_t* tokens, + int& index, + std::vector& path, + std::map& values) { + s.TraverseJsonForBuildStats(json, tokens, index, path, values); + } +}; + +namespace { + +// Helper to tokenize JSON using jsmn +static std::vector +Tokenize(const char* json) { + jsmn_parser parser; + jsmn_init(&parser); + int token_capacity = 32; + std::vector tokens(token_capacity); + while (true) { + int r = jsmn_parse( + &parser, json, strlen(json), tokens.data(), token_capacity); + if (r == JSMN_ERROR_NOMEM) { + token_capacity *= 2; + tokens.resize(token_capacity); + continue; + } + EXPECT_GE(r, 0) << "Failed to parse JSON with jsmn"; + tokens.resize(r); + break; + } + return tokens; +} + +static std::string +Substr(const char* json, const jsmntok_t& tok) { + return std::string(json + tok.start, tok.end - tok.start); +} + +} // namespace + +TEST(TraverseJsonForBuildStatsTest, + HandlesPrimitivesArraysNestedAndEmptyObject) { + const char* json = R"( + {"id": 34495370646 ,"type":"PublicEvent","actor":{"id":126890008,"login":"gegangene","display_login":"gegangene","gravatar_id":"", + "url":"https:\/\/api.github.com\/users\/gegangene","avatar_url":"https:\/\/avatars.githubusercontent.com\/u\/126890008?"}, + "repo":{"id":737601171,"name":"gegangene\/scheduler","url":"https:\/\/api.github.com\/repos\/gegangene\/scheduler"}, + "payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z"} + )"; + + auto tokens = Tokenize(json); + + // We only need an instance to access the private method we exposed. + milvus::storage::FieldDataMeta field_meta{1, 2, 3, 100, {}}; + milvus::storage::IndexMeta index_meta{3, 100, 1, 1}; + milvus::storage::StorageConfig storage_config; + storage_config.storage_type = "local"; + storage_config.root_path = "/tmp/test-traverse-json-build-stats"; + auto cm = milvus::storage::CreateChunkManager(storage_config); + milvus::storage::FileManagerContext ctx(field_meta, index_meta, cm); + JsonKeyStats stats(ctx, true); + + int index = 0; + std::vector path; + std::map values; + TraverseJsonForBuildStatsAccessor::Call( + stats, json, tokens.data(), index, path, values); + + // Expect collected key-value/type pairs + auto expect_has = [&](const std::string& key, + JSONType type, + const std::string& value_substr) { + JsonKey k{key, type}; + auto it = values.find(k); + ASSERT_NE(it, values.end()) << "Missing key: " << key; + EXPECT_EQ(it->second, value_substr); + }; + + expect_has("/id", JSONType::INT64, "34495370646"); + expect_has("/type", JSONType::STRING, "PublicEvent"); + expect_has("/actor/id", JSONType::INT64, "126890008"); + expect_has("/payload", JSONType::OBJECT, "{}"); + expect_has("/public", JSONType::BOOL, "true"); + expect_has("/created_at", JSONType::STRING, "2024-01-01T00:01:28Z"); +}