mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: Record simdjson error (#41003)
issue: #35528 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
c02892e9fb
commit
27ff3a42e7
@ -46,14 +46,7 @@ JsonInvertedIndex<T>::build_index_for_json(
|
||||
value_result<GetType> res = json_column->at<GetType>(nested_path_);
|
||||
auto err = res.error();
|
||||
if (err != simdjson::SUCCESS) {
|
||||
AssertInfo(
|
||||
err == simdjson::INCORRECT_TYPE ||
|
||||
err == simdjson::NO_SUCH_FIELD ||
|
||||
err == simdjson::INVALID_JSON_POINTER,
|
||||
"Failed to parse json, err: {}, json: {}, pointer: {}",
|
||||
err,
|
||||
*json_column,
|
||||
nested_path_);
|
||||
error_recorder_.Record(*json_column, nested_path_, err);
|
||||
if (err == simdjson::NO_SUCH_FIELD ||
|
||||
err == simdjson::INVALID_JSON_POINTER) {
|
||||
folly::SharedMutex::WriteHolder lock(this->mutex_);
|
||||
@ -72,6 +65,8 @@ JsonInvertedIndex<T>::build_index_for_json(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error_recorder_.PrintErrStats();
|
||||
}
|
||||
|
||||
template class JsonInvertedIndex<bool>;
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include "common/FieldDataInterface.h"
|
||||
#include "index/InvertedIndexTantivy.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
@ -18,6 +19,49 @@
|
||||
#include "tantivy-binding.h"
|
||||
|
||||
namespace milvus::index {
|
||||
class JsonInvertedIndexParseErrorRecorder {
|
||||
public:
|
||||
struct ErrorInstance {
|
||||
std::string json_str;
|
||||
std::string pointer;
|
||||
};
|
||||
struct ErrorStats {
|
||||
int64_t count;
|
||||
ErrorInstance first_instance;
|
||||
};
|
||||
void
|
||||
Record(const std::string_view& json_str,
|
||||
const std::string& pointer,
|
||||
const simdjson::error_code& error_code) {
|
||||
error_map_[error_code].count++;
|
||||
if (error_map_[error_code].count == 1) {
|
||||
error_map_[error_code].first_instance = {std::string(json_str),
|
||||
pointer};
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PrintErrStats() {
|
||||
if (error_map_.empty()) {
|
||||
LOG_INFO("No error found");
|
||||
return;
|
||||
}
|
||||
for (const auto& [error_code, stats] : error_map_) {
|
||||
LOG_INFO("Error code: {}, count: {}, first instance: {}",
|
||||
error_code,
|
||||
stats.count,
|
||||
stats.first_instance.json_str);
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<simdjson::error_code, ErrorStats>&
|
||||
GetErrorMap() {
|
||||
return error_map_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<simdjson::error_code, ErrorStats> error_map_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class JsonInvertedIndex : public index::InvertedIndexTantivy<T> {
|
||||
@ -67,9 +111,15 @@ class JsonInvertedIndex : public index::InvertedIndexTantivy<T> {
|
||||
return static_cast<enum DataType>(cast_type_);
|
||||
}
|
||||
|
||||
JsonInvertedIndexParseErrorRecorder&
|
||||
GetErrorRecorder() {
|
||||
return error_recorder_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string nested_path_;
|
||||
proto::schema::DataType cast_type_;
|
||||
JsonInvertedIndexParseErrorRecorder error_recorder_;
|
||||
};
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -96,6 +96,7 @@ set(MILVUS_TEST_FILES
|
||||
test_cached_search_iterator.cpp
|
||||
test_build_inverted_index_with_single_segment.cpp
|
||||
test_random_sample.cpp
|
||||
test_json_index.cpp
|
||||
)
|
||||
|
||||
if ( INDEX_ENGINE STREQUAL "cardinal" )
|
||||
|
||||
101
internal/core/unittest/test_json_index.cpp
Normal file
101
internal/core/unittest/test_json_index.cpp
Normal file
@ -0,0 +1,101 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "common/JsonCastType.h"
|
||||
#include "common/Schema.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "index/JsonInvertedIndex.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstdint>
|
||||
using namespace milvus;
|
||||
using namespace milvus::index;
|
||||
|
||||
TEST(JsonIndexTest, TestBuildNonExistJsonPath) {
|
||||
std::string json_path = "hello";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
auto file_manager_ctx = storage::FileManagerContext();
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_data_type(
|
||||
milvus::proto::schema::JSON);
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get());
|
||||
file_manager_ctx.fieldDataMeta.field_id = json_fid.get();
|
||||
auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex(
|
||||
index::INVERTED_INDEX_TYPE,
|
||||
JsonCastType::DOUBLE,
|
||||
json_path,
|
||||
file_manager_ctx);
|
||||
auto json_index = std::unique_ptr<JsonInvertedIndex<int32_t>>(
|
||||
static_cast<JsonInvertedIndex<int32_t>*>(inv_index.release()));
|
||||
|
||||
std::vector<std::string> json_raw_data = {R"({"hello": 1})",
|
||||
R"({"world": 2})"};
|
||||
|
||||
std::vector<milvus::Json> jsons;
|
||||
for (auto& json : json_raw_data) {
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(json)));
|
||||
}
|
||||
|
||||
auto json_field =
|
||||
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
|
||||
json_field->add_json_data(jsons);
|
||||
json_index->BuildWithFieldData({json_field});
|
||||
json_index->finish();
|
||||
json_index->create_reader();
|
||||
}
|
||||
|
||||
TEST(JsonIndexTest, TestJSONErrRecorder) {
|
||||
std::vector<std::string> json_raw_data = {
|
||||
R"(1)",
|
||||
R"({"a": true})",
|
||||
R"({"a": 1.0})",
|
||||
R"({"a": 1})",
|
||||
R"({"a": null})",
|
||||
R"({"a": [1,2,3]})",
|
||||
R"({"a": [1.0,2,3]})",
|
||||
R"({"a": {"b": 1}})",
|
||||
R"({"a": "1"})",
|
||||
R"({"a": 1, "a": 1.0})",
|
||||
};
|
||||
|
||||
std::string json_path = "/a";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
|
||||
auto file_manager_ctx = storage::FileManagerContext();
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_data_type(
|
||||
milvus::proto::schema::JSON);
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get());
|
||||
file_manager_ctx.fieldDataMeta.field_id = json_fid.get();
|
||||
|
||||
auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex(
|
||||
index::INVERTED_INDEX_TYPE,
|
||||
JsonCastType::DOUBLE,
|
||||
json_path,
|
||||
file_manager_ctx);
|
||||
auto json_index = std::unique_ptr<JsonInvertedIndex<double>>(
|
||||
static_cast<JsonInvertedIndex<double>*>(inv_index.release()));
|
||||
|
||||
std::vector<milvus::Json> jsons;
|
||||
for (auto& json : json_raw_data) {
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(json)));
|
||||
}
|
||||
|
||||
auto json_field =
|
||||
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
|
||||
json_field->add_json_data(jsons);
|
||||
json_index->BuildWithFieldData({json_field});
|
||||
|
||||
auto error_map = json_index->GetErrorRecorder().GetErrorMap();
|
||||
EXPECT_EQ(error_map.size(), 2);
|
||||
EXPECT_EQ(error_map[simdjson::error_code::INCORRECT_TYPE].count, 6);
|
||||
EXPECT_EQ(error_map[simdjson::error_code::INVALID_JSON_POINTER].count, 1);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user