milvus/internal/core/src/index/JsonIndexBuilder.cpp
Bingyi Sun 742d72a6c2
fix: Fix wrong null offsets for json path index (#43390)
issue: https://github.com/milvus-io/milvus/issues/43315

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
2025-07-26 17:26:54 +08:00

148 lines
5.4 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <string>
#include <string_view>
#include "common/JsonUtils.h"
#include "index/JsonIndexBuilder.h"
#include "simdjson/error.h"
namespace milvus::index {
template <typename T>
void
ProcessJsonFieldData(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas,
const proto::schema::FieldSchema& schema,
const std::string& nested_path,
const JsonCastType& cast_type,
JsonCastFunction cast_function,
JsonDataAdder<T> data_adder,
JsonNullAdder null_adder,
JsonNonExistAdder non_exist_adder,
JsonErrorRecorder error_recorder) {
int64_t offset = 0;
using SIMDJSON_T =
std::conditional_t<std::is_same_v<T, std::string>, std::string_view, T>;
auto tokens = parse_json_pointer(nested_path);
bool is_array = cast_type.data_type() == JsonCastType::DataType::ARRAY;
for (const auto& data : field_datas) {
auto n = data->get_num_rows();
for (int64_t i = 0; i < n; i++) {
auto json_column = static_cast<const Json*>(data->RawValue(i));
if (schema.nullable() && !data->is_valid(i)) {
non_exist_adder(offset);
null_adder(offset);
data_adder(nullptr, 0, offset++);
continue;
}
auto exists = path_exists(json_column->dom_doc(), tokens);
if (!exists || !json_column->exist(nested_path)) {
error_recorder(
*json_column, nested_path, simdjson::NO_SUCH_FIELD);
non_exist_adder(offset);
data_adder(nullptr, 0, offset++);
continue;
}
folly::fbvector<T> values;
if (is_array) {
auto doc = json_column->dom_doc();
auto array_res = doc.at_pointer(nested_path).get_array();
if (array_res.error() != simdjson::SUCCESS) {
error_recorder(
*json_column, nested_path, array_res.error());
} else {
auto array_values = array_res.value();
for (auto value : array_values) {
auto val = value.template get<SIMDJSON_T>();
if (val.error() == simdjson::SUCCESS) {
values.push_back(static_cast<T>(val.value()));
}
}
}
} else {
if (cast_function.match<T>()) {
auto res = JsonCastFunction::CastJsonValue<T>(
cast_function, *json_column, nested_path);
if (res.has_value()) {
values.push_back(res.value());
}
} else {
value_result<SIMDJSON_T> res =
json_column->at<SIMDJSON_T>(nested_path);
if (res.error() != simdjson::SUCCESS) {
error_recorder(*json_column, nested_path, res.error());
} else {
values.push_back(static_cast<T>(res.value()));
}
}
}
data_adder(values.data(), values.size(), offset++);
}
}
}
template void
ProcessJsonFieldData<bool>(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas,
const proto::schema::FieldSchema& schema,
const std::string& nested_path,
const JsonCastType& cast_type,
JsonCastFunction cast_function,
JsonDataAdder<bool> data_adder,
JsonNullAdder null_adder,
JsonNonExistAdder non_exist_adder,
JsonErrorRecorder error_recorder);
template void
ProcessJsonFieldData<int64_t>(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas,
const proto::schema::FieldSchema& schema,
const std::string& nested_path,
const JsonCastType& cast_type,
JsonCastFunction cast_function,
JsonDataAdder<int64_t> data_adder,
JsonNullAdder null_adder,
JsonNonExistAdder non_exist_adder,
JsonErrorRecorder error_recorder);
template void
ProcessJsonFieldData<double>(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas,
const proto::schema::FieldSchema& schema,
const std::string& nested_path,
const JsonCastType& cast_type,
JsonCastFunction cast_function,
JsonDataAdder<double> data_adder,
JsonNullAdder null_adder,
JsonNonExistAdder non_exist_adder,
JsonErrorRecorder error_recorder);
template void
ProcessJsonFieldData<std::string>(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas,
const proto::schema::FieldSchema& schema,
const std::string& nested_path,
const JsonCastType& cast_type,
JsonCastFunction cast_function,
JsonDataAdder<std::string> data_adder,
JsonNullAdder null_adder,
JsonNonExistAdder non_exist_adder,
JsonErrorRecorder error_recorder);
} // namespace milvus::index