diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index 56e744d60d..b8d180fa7e 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -1301,30 +1301,14 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) { auto op_type = expr_->op_type_; PkType pk = value_arg_.GetValue(); - auto query_timestamp = context.get_exec_context() - ->get_query_context() - ->get_query_timestamp(); - - switch (op_type) { - case proto::plan::GreaterThan: - case proto::plan::GreaterEqual: - case proto::plan::LessThan: - case proto::plan::LessEqual: - case proto::plan::Equal: - segment_->pk_range(op_type, pk, cache_view); - break; - case proto::plan::NotEqual: { - segment_->pk_range(proto::plan::Equal, pk, cache_view); - cache_view.flip(); - break; - } - default: - ThrowInfo( - OpTypeInvalid, - fmt::format("unsupported operator type for unary expr: {}", - op_type)); + if (op_type == proto::plan::NotEqual) { + segment_->pk_range(proto::plan::Equal, pk, cache_view); + cache_view.flip(); + } else { + segment_->pk_range(op_type, pk, cache_view); } } + TargetBitmap result; result.append( *cached_index_chunk_res_, current_data_global_pos_, real_batch_size); diff --git a/internal/core/src/index/json_stats/JsonKeyStats.cpp b/internal/core/src/index/json_stats/JsonKeyStats.cpp index 36587c27d2..7f23ba4e39 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.cpp +++ b/internal/core/src/index/json_stats/JsonKeyStats.cpp @@ -442,12 +442,11 @@ JsonKeyStats::TraverseJsonForBuildStats( } index = j; } else if (current.type == JSMN_STRING) { + auto value = + std::string(json + current.start, current.end - current.start); + auto unescaped = UnescapeJsonString(value); Assert(current.size == 0); - AddKeyStats( - path, - JSONType::STRING, - std::string(json + current.start, current.end - current.start), - values); + AddKeyStats(path, JSONType::STRING, unescaped, values); index++; } } diff --git a/internal/core/src/index/json_stats/utils.h b/internal/core/src/index/json_stats/utils.h index a899f99ba5..edddfcf24b 100644 --- a/internal/core/src/index/json_stats/utils.h +++ b/internal/core/src/index/json_stats/utils.h @@ -26,6 +26,8 @@ #include "common/jsmn.h" #include "arrow/api.h" #include "common/EasyAssert.h" +#include +#include namespace milvus::index { @@ -49,6 +51,47 @@ enum class JSONType { OBJECT }; +inline bool +JsonStringHasEscape(std::string_view s) { + // Any JSON escape must start with a backslash + return std::memchr(s.data(), '\\', s.size()) != nullptr; +} + +// Unescape a JSON-escaped string slice (without surrounding quotes) +// Returns a decoded UTF-8 std::string or throws on error +inline std::string +UnescapeJsonString(const std::string& escaped) { + if (!JsonStringHasEscape(escaped)) { + return escaped; + } + try { + simdjson::dom::parser parser; + std::string quoted; + quoted.resize(escaped.size() + 2); + quoted[0] = '"'; + std::memcpy("ed[1], escaped.data(), escaped.size()); + quoted[quoted.size() - 1] = '"'; + simdjson::dom::element elem = parser.parse(quoted); + if (elem.type() != simdjson::dom::element_type::STRING) { + ThrowInfo(ErrorCode::UnexpectedError, + "input is not a JSON string: {}", + escaped); + } + return std::string(std::string_view(elem.get_string())); + } catch (const simdjson::simdjson_error& e) { + ThrowInfo(ErrorCode::UnexpectedError, + "Failed to unescape json string (simdjson): {}, {}", + escaped, + e.what()); + } catch (const std::exception& e) { + ThrowInfo(ErrorCode::UnexpectedError, + "Failed to unescape json string: {}, {}", + escaped, + e.what()); + } + return {}; +} + inline std::string ToString(JSONType type) { switch (type) { diff --git a/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp index 5d09396a44..0d200d82a0 100644 --- a/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp +++ b/internal/core/unittest/test_json_stats/test_traverse_json_for_build_stats.cpp @@ -76,7 +76,8 @@ TEST(TraverseJsonForBuildStatsTest, {"id": 34495370646 ,"type":"PublicEvent","actor":{"id":126890008,"login":"gegangene","display_login":"gegangene","gravatar_id":"", "url":"https:\/\/api.github.com\/users\/gegangene","avatar_url":"https:\/\/avatars.githubusercontent.com\/u\/126890008?"}, "repo":{"id":737601171,"name":"gegangene\/scheduler","url":"https:\/\/api.github.com\/repos\/gegangene\/scheduler"}, - "payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z"} + "payload":{},"public":true,"created_at":"2024-01-01T00:01:28Z", + "msg":"line1\nline2\t\u4e2d\u6587 \/ backslash \\"} )"; auto tokens = Tokenize(json); @@ -113,4 +114,8 @@ TEST(TraverseJsonForBuildStatsTest, expect_has("/payload", JSONType::OBJECT, "{}"); expect_has("/public", JSONType::BOOL, "true"); expect_has("/created_at", JSONType::STRING, "2024-01-01T00:01:28Z"); + expect_has("/repo/url", + JSONType::STRING, + "https://api.github.com/repos/gegangene/scheduler"); + expect_has("/msg", JSONType::STRING, "line1\nline2\t中文 / backslash \\"); }