From cb2bc2b41b0aa609f3c97d05508433c88777ee48 Mon Sep 17 00:00:00 2001 From: zhagnlu Date: Tue, 4 Nov 2025 11:13:34 +0800 Subject: [PATCH] fix: fix bug for shredding json when empty but not null json (#45214) pr: #45221 Signed-off-by: luzhang Co-authored-by: luzhang --- .../src/index/json_stats/JsonKeyStats.cpp | 31 +++++++++++++++++-- .../core/src/index/json_stats/JsonKeyStats.h | 2 ++ .../test_traverse_json_for_build_stats.cpp | 29 +++++++++++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/internal/core/src/index/json_stats/JsonKeyStats.cpp b/internal/core/src/index/json_stats/JsonKeyStats.cpp index ddb9d6453e..2d1265085f 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.cpp +++ b/internal/core/src/index/json_stats/JsonKeyStats.cpp @@ -135,14 +135,23 @@ JsonKeyStats::TraverseJsonForStats(const char* json, std::vector& path, std::map& infos) { jsmntok current = tokens[0]; - Assert(current.type != JSMN_UNDEFINED); + AssertInfo(current.type != JSMN_UNDEFINED, + "current token type is undefined for json: {}.", + json); if (current.type == JSMN_OBJECT) { if (!path.empty()) { AddKeyStatsInfo(path, JSONType::OBJECT, nullptr, infos); } int j = 1; for (int i = 0; i < current.size; i++) { - Assert(tokens[j].type == JSMN_STRING && tokens[j].size != 0); + AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0, + "current token type is not string for json: {} at " + "type: {}, size: {}, value: {}", + json, + int(tokens[j].type), + tokens[j].size, + std::string(json + tokens[j].start, + tokens[j].end - tokens[j].start)); std::string key(json + tokens[j].start, tokens[j].end - tokens[j].start); path.push_back(key); @@ -221,6 +230,10 @@ JsonKeyStats::CollectSingleJsonStatsInfo( break; } + if (num_tokens == 0) { + return; + } + int index = 0; std::vector paths; TraverseJsonForStats(json_str, tokens.data(), index, paths, infos); @@ -385,7 +398,7 @@ JsonKeyStats::TraverseJsonForBuildStats( for (int i = 0; i < current.size; i++) { AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0, "current token type is not string for json: {} at " - "index: {}, type: {}, size: {} value: {}", + "type: {}, size: {}, value: {}", json, int(tokens[j].type), tokens[j].size, @@ -505,6 +518,10 @@ JsonKeyStats::BuildKeyStatsForRow(const char* json_str, uint32_t row_id) { break; } + if (num_tokens == 0) { + return; + } + int index = 0; std::vector paths; std::map values; @@ -576,6 +593,14 @@ JsonKeyStats::BuildKeyStats(const std::vector& field_datas, static_cast(data->RawValue(i)) ->data() .data(); + + // some situations, such as empty json string, + // should be handled as null row + if (strlen(json_str) == 0) { + BuildKeyStatsForNullRow(); + continue; + } + BuildKeyStatsForRow(json_str, row_id); } row_id++; diff --git a/internal/core/src/index/json_stats/JsonKeyStats.h b/internal/core/src/index/json_stats/JsonKeyStats.h index 7b53c5d698..4615ace0d3 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.h +++ b/internal/core/src/index/json_stats/JsonKeyStats.h @@ -18,6 +18,7 @@ // Forward declaration of test accessor in global namespace for friend declaration class TraverseJsonForBuildStatsAccessor; +class CollectSingleJsonStatsInfoAccessor; #include #include @@ -707,6 +708,7 @@ class JsonKeyStats : public ScalarIndex { // Friend accessor for unit tests to call private methods safely. friend class ::TraverseJsonForBuildStatsAccessor; + friend class ::CollectSingleJsonStatsInfoAccessor; }; using CacheJsonKeyStatsPtr = diff --git a/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp index 1d076de694..50a7bf427b 100644 --- a/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp +++ b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp @@ -39,6 +39,17 @@ class TraverseJsonForBuildStatsAccessor { } }; +// Friend accessor declared in JsonKeyStats to invoke private method for UT +class CollectSingleJsonStatsInfoAccessor { + public: + static void + Call(JsonKeyStats& s, + const char* json, + std::map& infos) { + s.CollectSingleJsonStatsInfo(json, infos); + } +}; + namespace { // Helper to tokenize JSON using jsmn @@ -120,3 +131,21 @@ TEST(TraverseJsonForBuildStatsTest, "https://api.github.com/repos/gegangene/scheduler"); expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\"); } + +TEST(CollectSingleJsonStatsInfoTest, EmptyJsonStringThrows) { + const char* json = ""; + + milvus::storage::FieldDataMeta field_meta{1, 2, 3, 100, {}}; + milvus::storage::IndexMeta index_meta{3, 100, 1, 1}; + milvus::storage::StorageConfig storage_config; + storage_config.storage_type = "local"; + storage_config.root_path = "/tmp/test-collect-single-json-stats-info"; + auto cm = milvus::storage::CreateChunkManager(storage_config); + auto fs = milvus::storage::InitArrowFileSystem(storage_config); + milvus::storage::FileManagerContext ctx(field_meta, index_meta, cm, fs); + JsonKeyStats stats(ctx, true); + + std::map infos; + EXPECT_NO_THROW( + { CollectSingleJsonStatsInfoAccessor::Call(stats, json, infos); }); +}