mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix:fix unescaped bug for json stats (#44421)
#42533 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
5cd2d99799
commit
9b6703626d
@ -1301,30 +1301,14 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) {
|
||||
|
||||
auto op_type = expr_->op_type_;
|
||||
PkType pk = value_arg_.GetValue<IndexInnerType>();
|
||||
auto query_timestamp = context.get_exec_context()
|
||||
->get_query_context()
|
||||
->get_query_timestamp();
|
||||
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan:
|
||||
case proto::plan::GreaterEqual:
|
||||
case proto::plan::LessThan:
|
||||
case proto::plan::LessEqual:
|
||||
case proto::plan::Equal:
|
||||
segment_->pk_range(op_type, pk, cache_view);
|
||||
break;
|
||||
case proto::plan::NotEqual: {
|
||||
segment_->pk_range(proto::plan::Equal, pk, cache_view);
|
||||
cache_view.flip();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ThrowInfo(
|
||||
OpTypeInvalid,
|
||||
fmt::format("unsupported operator type for unary expr: {}",
|
||||
op_type));
|
||||
if (op_type == proto::plan::NotEqual) {
|
||||
segment_->pk_range(proto::plan::Equal, pk, cache_view);
|
||||
cache_view.flip();
|
||||
} else {
|
||||
segment_->pk_range(op_type, pk, cache_view);
|
||||
}
|
||||
}
|
||||
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
*cached_index_chunk_res_, current_data_global_pos_, real_batch_size);
|
||||
|
||||
@ -442,12 +442,11 @@ JsonKeyStats::TraverseJsonForBuildStats(
|
||||
}
|
||||
index = j;
|
||||
} else if (current.type == JSMN_STRING) {
|
||||
auto value =
|
||||
std::string(json + current.start, current.end - current.start);
|
||||
auto unescaped = UnescapeJsonString(value);
|
||||
Assert(current.size == 0);
|
||||
AddKeyStats(
|
||||
path,
|
||||
JSONType::STRING,
|
||||
std::string(json + current.start, current.end - current.start),
|
||||
values);
|
||||
AddKeyStats(path, JSONType::STRING, unescaped, values);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,6 +26,8 @@
|
||||
#include "common/jsmn.h"
|
||||
#include "arrow/api.h"
|
||||
#include "common/EasyAssert.h"
|
||||
#include <simdjson.h>
|
||||
#include <cstring>
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
@ -49,6 +51,47 @@ enum class JSONType {
|
||||
OBJECT
|
||||
};
|
||||
|
||||
inline bool
|
||||
JsonStringHasEscape(std::string_view s) {
|
||||
// Any JSON escape must start with a backslash
|
||||
return std::memchr(s.data(), '\\', s.size()) != nullptr;
|
||||
}
|
||||
|
||||
// Unescape a JSON-escaped string slice (without surrounding quotes)
|
||||
// Returns a decoded UTF-8 std::string or throws on error
|
||||
inline std::string
|
||||
UnescapeJsonString(const std::string& escaped) {
|
||||
if (!JsonStringHasEscape(escaped)) {
|
||||
return escaped;
|
||||
}
|
||||
try {
|
||||
simdjson::dom::parser parser;
|
||||
std::string quoted;
|
||||
quoted.resize(escaped.size() + 2);
|
||||
quoted[0] = '"';
|
||||
std::memcpy("ed[1], escaped.data(), escaped.size());
|
||||
quoted[quoted.size() - 1] = '"';
|
||||
simdjson::dom::element elem = parser.parse(quoted);
|
||||
if (elem.type() != simdjson::dom::element_type::STRING) {
|
||||
ThrowInfo(ErrorCode::UnexpectedError,
|
||||
"input is not a JSON string: {}",
|
||||
escaped);
|
||||
}
|
||||
return std::string(std::string_view(elem.get_string()));
|
||||
} catch (const simdjson::simdjson_error& e) {
|
||||
ThrowInfo(ErrorCode::UnexpectedError,
|
||||
"Failed to unescape json string (simdjson): {}, {}",
|
||||
escaped,
|
||||
e.what());
|
||||
} catch (const std::exception& e) {
|
||||
ThrowInfo(ErrorCode::UnexpectedError,
|
||||
"Failed to unescape json string: {}, {}",
|
||||
escaped,
|
||||
e.what());
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
inline std::string
|
||||
ToString(JSONType type) {
|
||||
switch (type) {
|
||||
|
||||
@ -76,7 +76,8 @@ TEST(TraverseJsonForBuildStatsTest,
|
||||
{"id": 34495370646 ,"type":"PublicEvent","actor":{"id":126890008,"login":"gegangene","display_login":"gegangene","gravatar_id":"",
|
||||
"url":"https:\/\/api.github.com\/users\/gegangene","avatar_url":"https:\/\/avatars.githubusercontent.com\/u\/126890008?"},
|
||||
"repo":{"id":737601171,"name":"gegangene\/scheduler","url":"https:\/\/api.github.com\/repos\/gegangene\/scheduler"},
|
||||
"payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z"}
|
||||
"payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z",
|
||||
"msg":"line1\nline2\t\u4e2d\u6587 \/ backslash \\"}
|
||||
)";
|
||||
|
||||
auto tokens = Tokenize(json);
|
||||
@ -113,4 +114,8 @@ TEST(TraverseJsonForBuildStatsTest,
|
||||
expect_has("/payload", JSONType::OBJECT, "{}");
|
||||
expect_has("/public", JSONType::BOOL, "true");
|
||||
expect_has("/created_at", JSONType::STRING, "2024-01-01T00:01:28Z");
|
||||
expect_has("/repo/url",
|
||||
JSONType::STRING,
|
||||
"https://api.github.com/repos/gegangene/scheduler");
|
||||
expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\");
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user