mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: fix bug for shredding json when empty json but not null (#45221)
#45157 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
d320ccab99
commit
653e95aaad
@ -135,14 +135,23 @@ JsonKeyStats::TraverseJsonForStats(const char* json,
|
|||||||
std::vector<std::string>& path,
|
std::vector<std::string>& path,
|
||||||
std::map<JsonKey, KeyStatsInfo>& infos) {
|
std::map<JsonKey, KeyStatsInfo>& infos) {
|
||||||
jsmntok current = tokens[0];
|
jsmntok current = tokens[0];
|
||||||
Assert(current.type != JSMN_UNDEFINED);
|
AssertInfo(current.type != JSMN_UNDEFINED,
|
||||||
|
"current token type is undefined for json: {}.",
|
||||||
|
json);
|
||||||
if (current.type == JSMN_OBJECT) {
|
if (current.type == JSMN_OBJECT) {
|
||||||
if (!path.empty()) {
|
if (!path.empty()) {
|
||||||
AddKeyStatsInfo(path, JSONType::OBJECT, nullptr, infos);
|
AddKeyStatsInfo(path, JSONType::OBJECT, nullptr, infos);
|
||||||
}
|
}
|
||||||
int j = 1;
|
int j = 1;
|
||||||
for (int i = 0; i < current.size; i++) {
|
for (int i = 0; i < current.size; i++) {
|
||||||
Assert(tokens[j].type == JSMN_STRING && tokens[j].size != 0);
|
AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0,
|
||||||
|
"current token type is not string for json: {} at "
|
||||||
|
"type: {}, size: {}, value: {}",
|
||||||
|
json,
|
||||||
|
int(tokens[j].type),
|
||||||
|
tokens[j].size,
|
||||||
|
std::string(json + tokens[j].start,
|
||||||
|
tokens[j].end - tokens[j].start));
|
||||||
std::string key(json + tokens[j].start,
|
std::string key(json + tokens[j].start,
|
||||||
tokens[j].end - tokens[j].start);
|
tokens[j].end - tokens[j].start);
|
||||||
path.push_back(key);
|
path.push_back(key);
|
||||||
@ -221,6 +230,10 @@ JsonKeyStats::CollectSingleJsonStatsInfo(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (num_tokens == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
std::vector<std::string> paths;
|
std::vector<std::string> paths;
|
||||||
TraverseJsonForStats(json_str, tokens.data(), index, paths, infos);
|
TraverseJsonForStats(json_str, tokens.data(), index, paths, infos);
|
||||||
@ -385,7 +398,7 @@ JsonKeyStats::TraverseJsonForBuildStats(
|
|||||||
for (int i = 0; i < current.size; i++) {
|
for (int i = 0; i < current.size; i++) {
|
||||||
AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0,
|
AssertInfo(tokens[j].type == JSMN_STRING && tokens[j].size != 0,
|
||||||
"current token type is not string for json: {} at "
|
"current token type is not string for json: {} at "
|
||||||
"index: {}, type: {}, size: {} value: {}",
|
"type: {}, size: {}, value: {}",
|
||||||
json,
|
json,
|
||||||
int(tokens[j].type),
|
int(tokens[j].type),
|
||||||
tokens[j].size,
|
tokens[j].size,
|
||||||
@ -505,6 +518,10 @@ JsonKeyStats::BuildKeyStatsForRow(const char* json_str, uint32_t row_id) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (num_tokens == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
std::vector<std::string> paths;
|
std::vector<std::string> paths;
|
||||||
std::map<JsonKey, std::string> values;
|
std::map<JsonKey, std::string> values;
|
||||||
@ -576,6 +593,14 @@ JsonKeyStats::BuildKeyStats(const std::vector<FieldDataPtr>& field_datas,
|
|||||||
static_cast<const milvus::Json*>(data->RawValue(i))
|
static_cast<const milvus::Json*>(data->RawValue(i))
|
||||||
->data()
|
->data()
|
||||||
.data();
|
.data();
|
||||||
|
|
||||||
|
// some situations, such as empty json string,
|
||||||
|
// should be handled as null row
|
||||||
|
if (strlen(json_str) == 0) {
|
||||||
|
BuildKeyStatsForNullRow();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
BuildKeyStatsForRow(json_str, row_id);
|
BuildKeyStatsForRow(json_str, row_id);
|
||||||
}
|
}
|
||||||
row_id++;
|
row_id++;
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
// Forward declaration of test accessor in global namespace for friend declaration
|
// Forward declaration of test accessor in global namespace for friend declaration
|
||||||
class TraverseJsonForBuildStatsAccessor;
|
class TraverseJsonForBuildStatsAccessor;
|
||||||
|
class CollectSingleJsonStatsInfoAccessor;
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
@ -707,6 +708,7 @@ class JsonKeyStats : public ScalarIndex<std::string> {
|
|||||||
|
|
||||||
// Friend accessor for unit tests to call private methods safely.
|
// Friend accessor for unit tests to call private methods safely.
|
||||||
friend class ::TraverseJsonForBuildStatsAccessor;
|
friend class ::TraverseJsonForBuildStatsAccessor;
|
||||||
|
friend class ::CollectSingleJsonStatsInfoAccessor;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CacheJsonKeyStatsPtr =
|
using CacheJsonKeyStatsPtr =
|
||||||
|
|||||||
@ -39,6 +39,17 @@ class TraverseJsonForBuildStatsAccessor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Friend accessor declared in JsonKeyStats to invoke private method for UT
|
||||||
|
class CollectSingleJsonStatsInfoAccessor {
|
||||||
|
public:
|
||||||
|
static void
|
||||||
|
Call(JsonKeyStats& s,
|
||||||
|
const char* json,
|
||||||
|
std::map<JsonKey, milvus::index::KeyStatsInfo>& infos) {
|
||||||
|
s.CollectSingleJsonStatsInfo(json, infos);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Helper to tokenize JSON using jsmn
|
// Helper to tokenize JSON using jsmn
|
||||||
@ -120,3 +131,21 @@ TEST(TraverseJsonForBuildStatsTest,
|
|||||||
"https://api.github.com/repos/gegangene/scheduler");
|
"https://api.github.com/repos/gegangene/scheduler");
|
||||||
expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\");
|
expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CollectSingleJsonStatsInfoTest, EmptyJsonStringThrows) {
|
||||||
|
const char* json = "";
|
||||||
|
|
||||||
|
milvus::storage::FieldDataMeta field_meta{1, 2, 3, 100, {}};
|
||||||
|
milvus::storage::IndexMeta index_meta{3, 100, 1, 1};
|
||||||
|
milvus::storage::StorageConfig storage_config;
|
||||||
|
storage_config.storage_type = "local";
|
||||||
|
storage_config.root_path = "/tmp/test-collect-single-json-stats-info";
|
||||||
|
auto cm = milvus::storage::CreateChunkManager(storage_config);
|
||||||
|
auto fs = milvus::storage::InitArrowFileSystem(storage_config);
|
||||||
|
milvus::storage::FileManagerContext ctx(field_meta, index_meta, cm, fs);
|
||||||
|
JsonKeyStats stats(ctx, true);
|
||||||
|
|
||||||
|
std::map<JsonKey, milvus::index::KeyStatsInfo> infos;
|
||||||
|
EXPECT_NO_THROW(
|
||||||
|
{ CollectSingleJsonStatsInfoAccessor::Call(stats, json, infos); });
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user