mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
fix:fix unescaped bug for json stats (#44421)
#42533 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
5cd2d99799
commit
9b6703626d
@ -1301,30 +1301,14 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) {
|
|||||||
|
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
PkType pk = value_arg_.GetValue<IndexInnerType>();
|
PkType pk = value_arg_.GetValue<IndexInnerType>();
|
||||||
auto query_timestamp = context.get_exec_context()
|
if (op_type == proto::plan::NotEqual) {
|
||||||
->get_query_context()
|
|
||||||
->get_query_timestamp();
|
|
||||||
|
|
||||||
switch (op_type) {
|
|
||||||
case proto::plan::GreaterThan:
|
|
||||||
case proto::plan::GreaterEqual:
|
|
||||||
case proto::plan::LessThan:
|
|
||||||
case proto::plan::LessEqual:
|
|
||||||
case proto::plan::Equal:
|
|
||||||
segment_->pk_range(op_type, pk, cache_view);
|
|
||||||
break;
|
|
||||||
case proto::plan::NotEqual: {
|
|
||||||
segment_->pk_range(proto::plan::Equal, pk, cache_view);
|
segment_->pk_range(proto::plan::Equal, pk, cache_view);
|
||||||
cache_view.flip();
|
cache_view.flip();
|
||||||
break;
|
} else {
|
||||||
}
|
segment_->pk_range(op_type, pk, cache_view);
|
||||||
default:
|
|
||||||
ThrowInfo(
|
|
||||||
OpTypeInvalid,
|
|
||||||
fmt::format("unsupported operator type for unary expr: {}",
|
|
||||||
op_type));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetBitmap result;
|
TargetBitmap result;
|
||||||
result.append(
|
result.append(
|
||||||
*cached_index_chunk_res_, current_data_global_pos_, real_batch_size);
|
*cached_index_chunk_res_, current_data_global_pos_, real_batch_size);
|
||||||
|
|||||||
@ -442,12 +442,11 @@ JsonKeyStats::TraverseJsonForBuildStats(
|
|||||||
}
|
}
|
||||||
index = j;
|
index = j;
|
||||||
} else if (current.type == JSMN_STRING) {
|
} else if (current.type == JSMN_STRING) {
|
||||||
|
auto value =
|
||||||
|
std::string(json + current.start, current.end - current.start);
|
||||||
|
auto unescaped = UnescapeJsonString(value);
|
||||||
Assert(current.size == 0);
|
Assert(current.size == 0);
|
||||||
AddKeyStats(
|
AddKeyStats(path, JSONType::STRING, unescaped, values);
|
||||||
path,
|
|
||||||
JSONType::STRING,
|
|
||||||
std::string(json + current.start, current.end - current.start),
|
|
||||||
values);
|
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -26,6 +26,8 @@
|
|||||||
#include "common/jsmn.h"
|
#include "common/jsmn.h"
|
||||||
#include "arrow/api.h"
|
#include "arrow/api.h"
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
|
#include <simdjson.h>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -49,6 +51,47 @@ enum class JSONType {
|
|||||||
OBJECT
|
OBJECT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline bool
|
||||||
|
JsonStringHasEscape(std::string_view s) {
|
||||||
|
// Any JSON escape must start with a backslash
|
||||||
|
return std::memchr(s.data(), '\\', s.size()) != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unescape a JSON-escaped string slice (without surrounding quotes)
|
||||||
|
// Returns a decoded UTF-8 std::string or throws on error
|
||||||
|
inline std::string
|
||||||
|
UnescapeJsonString(const std::string& escaped) {
|
||||||
|
if (!JsonStringHasEscape(escaped)) {
|
||||||
|
return escaped;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
std::string quoted;
|
||||||
|
quoted.resize(escaped.size() + 2);
|
||||||
|
quoted[0] = '"';
|
||||||
|
std::memcpy("ed[1], escaped.data(), escaped.size());
|
||||||
|
quoted[quoted.size() - 1] = '"';
|
||||||
|
simdjson::dom::element elem = parser.parse(quoted);
|
||||||
|
if (elem.type() != simdjson::dom::element_type::STRING) {
|
||||||
|
ThrowInfo(ErrorCode::UnexpectedError,
|
||||||
|
"input is not a JSON string: {}",
|
||||||
|
escaped);
|
||||||
|
}
|
||||||
|
return std::string(std::string_view(elem.get_string()));
|
||||||
|
} catch (const simdjson::simdjson_error& e) {
|
||||||
|
ThrowInfo(ErrorCode::UnexpectedError,
|
||||||
|
"Failed to unescape json string (simdjson): {}, {}",
|
||||||
|
escaped,
|
||||||
|
e.what());
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
ThrowInfo(ErrorCode::UnexpectedError,
|
||||||
|
"Failed to unescape json string: {}, {}",
|
||||||
|
escaped,
|
||||||
|
e.what());
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
inline std::string
|
inline std::string
|
||||||
ToString(JSONType type) {
|
ToString(JSONType type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|||||||
@ -76,7 +76,8 @@ TEST(TraverseJsonForBuildStatsTest,
|
|||||||
{"id": 34495370646 ,"type":"PublicEvent","actor":{"id":126890008,"login":"gegangene","display_login":"gegangene","gravatar_id":"",
|
{"id": 34495370646 ,"type":"PublicEvent","actor":{"id":126890008,"login":"gegangene","display_login":"gegangene","gravatar_id":"",
|
||||||
"url":"https:\/\/api.github.com\/users\/gegangene","avatar_url":"https:\/\/avatars.githubusercontent.com\/u\/126890008?"},
|
"url":"https:\/\/api.github.com\/users\/gegangene","avatar_url":"https:\/\/avatars.githubusercontent.com\/u\/126890008?"},
|
||||||
"repo":{"id":737601171,"name":"gegangene\/scheduler","url":"https:\/\/api.github.com\/repos\/gegangene\/scheduler"},
|
"repo":{"id":737601171,"name":"gegangene\/scheduler","url":"https:\/\/api.github.com\/repos\/gegangene\/scheduler"},
|
||||||
"payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z"}
|
"payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z",
|
||||||
|
"msg":"line1\nline2\t\u4e2d\u6587 \/ backslash \\"}
|
||||||
)";
|
)";
|
||||||
|
|
||||||
auto tokens = Tokenize(json);
|
auto tokens = Tokenize(json);
|
||||||
@ -113,4 +114,8 @@ TEST(TraverseJsonForBuildStatsTest,
|
|||||||
expect_has("/payload", JSONType::OBJECT, "{}");
|
expect_has("/payload", JSONType::OBJECT, "{}");
|
||||||
expect_has("/public", JSONType::BOOL, "true");
|
expect_has("/public", JSONType::BOOL, "true");
|
||||||
expect_has("/created_at", JSONType::STRING, "2024-01-01T00:01:28Z");
|
expect_has("/created_at", JSONType::STRING, "2024-01-01T00:01:28Z");
|
||||||
|
expect_has("/repo/url",
|
||||||
|
JSONType::STRING,
|
||||||
|
"https://api.github.com/repos/gegangene/scheduler");
|
||||||
|
expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\");
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user