diff --git a/internal/core/src/common/Json.h b/internal/core/src/common/Json.h index bfb147a527..be3bccd2ea 100644 --- a/internal/core/src/common/Json.h +++ b/internal/core/src/common/Json.h @@ -202,8 +202,12 @@ class Json { bool exist(std::string_view pointer) const { auto doc = this->doc(); - auto res = doc.at_pointer(pointer); - return res.error() == simdjson::SUCCESS && !res.is_null(); + if (pointer.empty()) { + return doc.error() == simdjson::SUCCESS && !doc.is_null(); + } else { + auto res = doc.at_pointer(pointer); + return res.error() == simdjson::SUCCESS && !res.is_null(); + } } // construct JSON pointer with provided path diff --git a/internal/core/src/common/JsonCastType.cpp b/internal/core/src/common/JsonCastType.cpp index c45c2ff7cb..446acbfe5e 100644 --- a/internal/core/src/common/JsonCastType.cpp +++ b/internal/core/src/common/JsonCastType.cpp @@ -21,7 +21,10 @@ const std::unordered_map JsonCastType::DataType::DOUBLE)}, {"ARRAY_VARCHAR", JsonCastType(JsonCastType::DataType::ARRAY, - JsonCastType::DataType::VARCHAR)}}; + JsonCastType::DataType::VARCHAR)}, + {"JSON", + JsonCastType(JsonCastType::DataType::JSON, + JsonCastType::DataType::JSON)}}; const JsonCastType JsonCastType::UNKNOWN = JsonCastType( JsonCastType::DataType::UNKNOWN, JsonCastType::DataType::UNKNOWN); diff --git a/internal/core/src/common/JsonCastType.h b/internal/core/src/common/JsonCastType.h index c235f8985b..95c09f91af 100644 --- a/internal/core/src/common/JsonCastType.h +++ b/internal/core/src/common/JsonCastType.h @@ -22,7 +22,7 @@ namespace milvus { using MilvusDataType = milvus::DataType; class JsonCastType { public: - enum class DataType { UNKNOWN, BOOL, DOUBLE, VARCHAR, ARRAY }; + enum class DataType { UNKNOWN, BOOL, DOUBLE, VARCHAR, ARRAY, JSON }; static const JsonCastType UNKNOWN; diff --git a/internal/core/src/common/JsonUtils.cpp b/internal/core/src/common/JsonUtils.cpp new file mode 100644 index 0000000000..2988da24b1 --- /dev/null +++ b/internal/core/src/common/JsonUtils.cpp @@ -0,0 +1,74 @@ +#include "common/JsonUtils.h" + +namespace milvus { + +// Parse a JSON Pointer into unescaped path segments +std::vector +parse_json_pointer(const std::string& pointer) { + std::vector tokens; + if (pointer.empty()) + return tokens; // Root path (entire document) + if (pointer[0] != '/') { + throw std::invalid_argument( + "Invalid JSON Pointer: must start with '/'"); + } + size_t start = 1; + while (start < pointer.size()) { + size_t end = pointer.find('/', start); + if (end == std::string::npos) + end = pointer.size(); + std::string token = pointer.substr(start, end - start); + // Replace ~1 with / and ~0 with ~ + size_t pos = 0; + while ((pos = token.find("~1", pos)) != std::string::npos) { + token.replace(pos, 2, "/"); + pos += 1; // Avoid infinite loops on overlapping replacements + } + pos = 0; + while ((pos = token.find("~0", pos)) != std::string::npos) { + token.replace(pos, 2, "~"); + pos += 1; + } + tokens.push_back(token); + start = end + 1; + } + return tokens; +} + +// Check if a JSON Pointer path exists +bool +path_exists(const simdjson::dom::element& root, + const std::vector& tokens) { + simdjson::dom::element current = root; + for (const auto& token : tokens) { + if (current.type() == simdjson::dom::element_type::OBJECT) { + auto obj = current.get_object(); + if (obj.error()) + return false; + auto next = obj.value().at_key(token); + if (next.error()) + return false; + current = next.value(); + } else if (current.type() == simdjson::dom::element_type::ARRAY) { + if (token == "-") + return false; // "-" is invalid for existence checks + char* endptr; + long index = strtol(token.c_str(), &endptr, 10); + if (*endptr != '\0' || index < 0) + return false; // Not a valid index + auto arr = current.get_array(); + if (arr.error()) + return false; + if (static_cast(index) >= arr.value().size()) + return false; + auto next = arr.value().at(index); + if (next.error()) + return false; + current = next.value(); + } else { + return false; // Path cannot be resolved + } + } + return true; +} +} // namespace milvus diff --git a/internal/core/src/common/JsonUtils.h b/internal/core/src/common/JsonUtils.h new file mode 100644 index 0000000000..b1fa2e92e8 --- /dev/null +++ b/internal/core/src/common/JsonUtils.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include +#include "simdjson/dom.h" + +namespace milvus { + +// Parse a JSON Pointer into unescaped path segments +std::vector +parse_json_pointer(const std::string& pointer); + +// Check if a JSON Pointer path exists +bool +path_exists(const simdjson::dom::element& root, + const std::vector& tokens); +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index ec907ee611..f3c55646f2 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -80,6 +80,15 @@ PhyExistsFilterExpr::EvalJsonExistsForIndex() { break; } + case JsonCastType::DataType::JSON: { + auto* json_flat_index = + dynamic_cast(index); + auto executor = + json_flat_index->create_executor(pointer); + cached_index_chunk_res_ = executor->IsNotNull().clone(); + break; + } + default: PanicInfo(DataTypeInvalid, "unsupported data type: {}", @@ -116,8 +125,8 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) { auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); int processed_cursor = 0; auto execute_sub_batch = - [&bitmap_input, & - processed_cursor ]( + [&bitmap_input, + &processed_cursor]( const milvus::Json* data, const bool* valid_data, const int32_t* offsets, @@ -125,23 +134,23 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) { TargetBitmapView res, TargetBitmapView valid_res, const std::string& pointer) { - bool has_bitmap_input = !bitmap_input.empty(); - for (int i = 0; i < size; ++i) { - auto offset = i; - if constexpr (filter_type == FilterType::random) { - offset = (offsets) ? offsets[i] : i; + bool has_bitmap_input = !bitmap_input.empty(); + for (int i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + if (has_bitmap_input && !bitmap_input[processed_cursor + i]) { + continue; + } + res[i] = data[offset].exist(pointer); } - if (valid_data != nullptr && !valid_data[offset]) { - res[i] = valid_res[i] = false; - continue; - } - if (has_bitmap_input && !bitmap_input[processed_cursor + i]) { - continue; - } - res[i] = data[offset].exist(pointer); - } - processed_cursor += size; - }; + processed_cursor += size; + }; int64_t processed_size; if (has_offset_input_) { diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 285b5f9584..cacd3c9621 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -29,6 +29,8 @@ #include "exec/expression/Utils.h" #include "exec/QueryContext.h" #include "expr/ITypeExpr.h" +#include "index/Index.h" +#include "index/JsonFlatIndex.h" #include "log/Log.h" #include "query/PlanProto.h" #include "segcore/SegmentSealed.h" @@ -824,14 +826,34 @@ class SegmentExpr : public Expr { // executing costs quite much time. if (cached_index_chunk_id_ != i) { Index* index_ptr = nullptr; + PinWrapper json_pw; PinWrapper pw; + // Executor for JsonFlatIndex. Must outlive index_ptr. Only used for JSON type. + std::shared_ptr< + index::JsonFlatIndexQueryExecutor> + executor; if (field_type_ == DataType::JSON) { auto pointer = milvus::Json::pointer(nested_path_); + json_pw = segment_->chunk_json_index(field_id_, pointer, i); - pw = segment_->chunk_scalar_index( - field_id_, pointer, i); - index_ptr = const_cast(pw.get()); + // check if it is a json flat index, if so, create a json flat index query executor + auto json_flat_index = + dynamic_cast( + json_pw.get()); + + if (json_flat_index) { + auto index_path = json_flat_index->GetNestedPath(); + executor = + json_flat_index + ->template create_executor( + pointer.substr(index_path.size())); + index_ptr = executor.get(); + } else { + auto json_index = + const_cast(json_pw.get()); + index_ptr = dynamic_cast(json_index); + } } else { pw = segment_->chunk_scalar_index(field_id_, i); diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index 587c275f43..f016c77d2b 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -16,6 +16,7 @@ #include "UnaryExpr.h" #include +#include "common/EasyAssert.h" #include "common/Json.h" #include "common/Types.h" #include "common/type_c.h" @@ -218,10 +219,6 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { case proto::plan::GenericValue::ValCase::kStringVal: result = ExecRangeVisitorImplForIndex(); break; - case proto::plan::GenericValue::ValCase::kArrayVal: - result = - ExecRangeVisitorImplForIndex(); - break; default: PanicInfo( DataTypeInvalid, "unknown data type: {}", val_type); @@ -321,8 +318,9 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) { } int processed_cursor = 0; auto execute_sub_batch = - [ op_type, &processed_cursor, & - bitmap_input ]( + [op_type, + &processed_cursor, + &bitmap_input]( const milvus::ArrayView* data, const bool* valid_data, const int32_t* offsets, @@ -331,185 +329,186 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) { TargetBitmapView valid_res, ValueType val, int index) { - switch (op_type) { - case proto::plan::GreaterThan: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; + switch (op_type) { + case proto::plan::GreaterThan: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::GreaterEqual: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::LessThan: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::LessEqual: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::Equal: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::NotEqual: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::PrefixMatch: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::Match: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::PostfixMatch: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + case proto::plan::InnerMatch: { + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + bitmap_input, + processed_cursor, + offsets); + break; + } + default: + PanicInfo( + OpTypeInvalid, + fmt::format( + "unsupported operator type for unary expr: {}", + op_type)); } - case proto::plan::GreaterEqual: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::LessThan: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::LessEqual: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::Equal: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::NotEqual: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::PrefixMatch: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::Match: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::PostfixMatch: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - case proto::plan::InnerMatch: { - UnaryElementFuncForArray - func; - func(data, - valid_data, - size, - val, - index, - res, - valid_res, - bitmap_input, - processed_cursor, - offsets); - break; - } - default: - PanicInfo( - OpTypeInvalid, - fmt::format("unsupported operator type for unary expr: {}", - op_type)); - } - processed_cursor += size; - }; + processed_cursor += size; + }; int64_t processed_size; if (has_offset_input_) { processed_size = @@ -709,16 +708,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) { } while (false) int processed_cursor = 0; - auto execute_sub_batch = - [ op_type, pointer, &processed_cursor, & - bitmap_input ]( - const milvus::Json* data, - const bool* valid_data, - const int32_t* offsets, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ExprValueType val) { + auto execute_sub_batch = [op_type, + pointer, + &processed_cursor, + &bitmap_input]( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ExprValueType val) { bool has_bitmap_input = !bitmap_input.empty(); switch (op_type) { case proto::plan::GreaterThan: { @@ -1647,16 +1648,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) { auto expr_type = expr_->op_type_; size_t processed_cursor = 0; - auto execute_sub_batch = - [ expr_type, &processed_cursor, & - bitmap_input ]( - const T* data, - const bool* valid_data, - const int32_t* offsets, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - IndexInnerType val) { + auto execute_sub_batch = [expr_type, + &processed_cursor, + &bitmap_input]( + const T* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + IndexInnerType val) { switch (expr_type) { case proto::plan::GreaterThan: { UnaryElementFunc func; diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index 7ce06bc9ee..1982d82674 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -17,11 +17,13 @@ #include "index/IndexFactory.h" #include #include +#include #include "common/EasyAssert.h" #include "common/FieldDataInterface.h" #include "common/JsonCastType.h" #include "common/Types.h" #include "index/Index.h" +#include "index/JsonFlatIndex.h" #include "index/VectorMemIndex.h" #include "index/Utils.h" #include "index/Meta.h" @@ -408,6 +410,9 @@ IndexFactory::CreateJsonIndex( nested_path, file_manager_context, JsonCastFunction::FromString(json_cast_function)); + case JsonCastType::DataType::JSON: + return std::make_unique(file_manager_context, + nested_path); default: PanicInfo(DataTypeInvalid, "Invalid data type:{}", cast_dtype); } diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 173191be4c..69bafcd58d 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -61,18 +61,21 @@ InvertedIndexTantivy::InitForBuildIndex() { d_type_, path_.c_str(), tantivy_index_version_, - inverted_index_single_segment_); + inverted_index_single_segment_, + user_specified_doc_id_); } template InvertedIndexTantivy::InvertedIndexTantivy( uint32_t tantivy_index_version, const storage::FileManagerContext& ctx, - bool inverted_index_single_segment) + bool inverted_index_single_segment, + bool user_specified_doc_id) : ScalarIndex(INVERTED_INDEX_TYPE), schema_(ctx.fieldDataMeta.field_schema), tantivy_index_version_(tantivy_index_version), - inverted_index_single_segment_(inverted_index_single_segment) { + inverted_index_single_segment_(inverted_index_single_segment), + user_specified_doc_id_(user_specified_doc_id) { mem_file_manager_ = std::make_shared(ctx); disk_file_manager_ = std::make_shared(ctx); // push init wrapper to load process diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 1e0a6d24a2..a101fb0172 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -49,6 +49,10 @@ get_tantivy_data_type(proto::schema::DataType data_type) { return TantivyDataType::Keyword; } + case proto::schema::DataType::JSON: { + return TantivyDataType::JSON; + } + default: PanicInfo(ErrorCode::NotImplemented, fmt::format("not implemented data type: {}", data_type)); @@ -72,7 +76,8 @@ class InvertedIndexTantivy : public ScalarIndex { // Default, we build tantivy index with version 7 (newest version now). explicit InvertedIndexTantivy(uint32_t tantivy_index_version, const storage::FileManagerContext& ctx, - bool inverted_index_single_segment = false); + bool inverted_index_single_segment = false, + bool user_specified_doc_id = true); ~InvertedIndexTantivy(); @@ -183,7 +188,7 @@ class InvertedIndexTantivy : public ScalarIndex { return Count(); } - const TargetBitmap + virtual const TargetBitmap PrefixMatch(const std::string_view prefix); const TargetBitmap @@ -253,7 +258,7 @@ class InvertedIndexTantivy : public ScalarIndex { const std::vector>& field_datas) { PanicInfo(ErrorCode::NotImplemented, "build_index_for_json not implemented"); - }; + } protected: std::shared_ptr wrapper_; @@ -285,6 +290,10 @@ class InvertedIndexTantivy : public ScalarIndex { // building node to build specific type of tantivy index. bool inverted_index_single_segment_{false}; + // `user_specified_doc_id_` is used to control whether to use user specified doc id. + // If `user_specified_doc_id_` is true, the doc id is specified by the user, otherwise, the doc id is generated by the index. + bool user_specified_doc_id_{true}; + // `tantivy_index_version_` is used to control which kind of tantivy index should be used. // There could be the case where milvus version of read node is lower than the version of index builder node(and read node // may not be upgraded to a higher version in a predictable time), so we are using a lower version of tantivy to read index diff --git a/internal/core/src/index/JsonFlatIndex.cpp b/internal/core/src/index/JsonFlatIndex.cpp new file mode 100644 index 0000000000..445be22dfd --- /dev/null +++ b/internal/core/src/index/JsonFlatIndex.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "index/JsonFlatIndex.h" +#include "common/Types.h" +#include "index/InvertedIndexUtil.h" +#include "log/Log.h" +#include "simdjson/builtin.h" +#include "simdjson/padded_string.h" +#include "common/JsonUtils.h" +namespace milvus::index { + +void +JsonFlatIndex::build_index_for_json( + const std::vector>& field_datas) { + int64_t offset = 0; + auto tokens = parse_json_pointer(nested_path_); + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + for (int i = 0; i < n; i++) { + if (schema_.nullable() && !data->is_valid(i)) { + null_offset_.push_back(offset); + wrapper_->add_json_array_data(nullptr, 0, offset++); + continue; + } + auto json = static_cast(data->RawValue(i)); + auto exists = path_exists(json->dom_doc(), tokens); + if (!exists || !json->exist(nested_path_)) { + null_offset_.push_back(offset); + wrapper_->add_json_array_data(nullptr, 0, offset++); + continue; + } + + if (nested_path_ == "") { + wrapper_->add_json_data(json, 1, offset++); + } else { + auto doc = json->doc(); + auto res = doc.at_pointer(nested_path_); + auto err = res.error(); + if (err != simdjson::SUCCESS) { + wrapper_->add_json_array_data(nullptr, 0, offset++); + } else { + auto str = simdjson::to_json_string(res.value()).value(); + Json subpath_json = Json(simdjson::padded_string(str)); + wrapper_->add_json_data(&subpath_json, 1, offset++); + } + } + } + } +} +} // namespace milvus::index diff --git a/internal/core/src/index/JsonFlatIndex.h b/internal/core/src/index/JsonFlatIndex.h new file mode 100644 index 0000000000..cfc377ae9e --- /dev/null +++ b/internal/core/src/index/JsonFlatIndex.h @@ -0,0 +1,227 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once +#include +#include +#include "common/EasyAssert.h" +#include "common/JsonCastType.h" +#include "common/Types.h" +#include "index/Index.h" +#include "index/InvertedIndexTantivy.h" +#include "index/InvertedIndexUtil.h" +#include "index/ScalarIndex.h" +#include "log/Log.h" +namespace milvus::index { + +// JsonFlatIndexQueryExecutor is used to execute queries on a specified json path, and can be constructed by JsonFlatIndex +template +class JsonFlatIndexQueryExecutor : public InvertedIndexTantivy { + public: + JsonFlatIndexQueryExecutor(std::string& json_path, + std::shared_ptr wrapper) { + json_path_ = json_path; + this->wrapper_ = wrapper; + } + + const TargetBitmap + In(size_t n, const T* values) override { + TargetBitmap bitset(this->Count()); + for (size_t i = 0; i < n; ++i) { + this->wrapper_->json_term_query(json_path_, values[i], &bitset); + } + return bitset; + } + + const TargetBitmap + IsNull() override { + TargetBitmap bitset(this->Count()); + this->wrapper_->json_exist_query(json_path_, &bitset); + bitset.flip(); + return bitset; + } + + const TargetBitmap + IsNotNull() override { + TargetBitmap bitset(this->Count()); + this->wrapper_->json_exist_query(json_path_, &bitset); + return bitset; + } + + const TargetBitmap + InApplyFilter( + size_t n, + const T* values, + const std::function& filter) override { + TargetBitmap bitset(this->Count()); + for (size_t i = 0; i < n; ++i) { + this->wrapper_->json_term_query(json_path_, values[i], &bitset); + apply_hits_with_filter(bitset, filter); + } + return bitset; + } + + virtual void + InApplyCallback( + size_t n, + const T* values, + const std::function& callback) override { + TargetBitmap bitset(this->Count()); + for (size_t i = 0; i < n; ++i) { + this->wrapper_->json_term_query(json_path_, values[i], &bitset); + apply_hits_with_callback(bitset, callback); + } + } + + const TargetBitmap + NotIn(size_t n, const T* values) override { + TargetBitmap bitset(this->Count()); + for (size_t i = 0; i < n; ++i) { + this->wrapper_->json_term_query(json_path_, values[i], &bitset); + } + + bitset.flip(); + + // TODO: optimize this + auto null_bitset = IsNotNull(); + bitset &= null_bitset; + + return bitset; + } + + const TargetBitmap + Range(T value, OpType op) override { + LOG_INFO("[executor] JsonFlatIndexQueryExecutor Range"); + TargetBitmap bitset(this->Count()); + switch (op) { + case OpType::LessThan: { + this->wrapper_->json_range_query( + json_path_, T(), value, true, false, false, false, &bitset); + } break; + case OpType::LessEqual: { + this->wrapper_->json_range_query( + json_path_, T(), value, true, false, true, false, &bitset); + } break; + case OpType::GreaterThan: { + this->wrapper_->json_range_query( + json_path_, value, T(), false, true, false, false, &bitset); + } break; + case OpType::GreaterEqual: { + this->wrapper_->json_range_query( + json_path_, value, T(), false, true, true, false, &bitset); + } break; + default: + PanicInfo(OpTypeInvalid, + fmt::format("Invalid OperatorType: {}", op)); + } + return bitset; + } + + const TargetBitmap + Query(const DatasetPtr& dataset) override { + return InvertedIndexTantivy::Query(dataset); + } + + const TargetBitmap + Range(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive) override { + TargetBitmap bitset(this->Count()); + this->wrapper_->json_range_query(json_path_, + lower_bound_value, + upper_bound_value, + false, + false, + lb_inclusive, + ub_inclusive, + &bitset); + return bitset; + } + + const TargetBitmap + PrefixMatch(const std::string_view prefix) override { + TargetBitmap bitset(this->Count()); + this->wrapper_->json_prefix_query( + json_path_, std::string(prefix), &bitset); + return bitset; + } + + const TargetBitmap + RegexQuery(const std::string& pattern) override { + TargetBitmap bitset(this->Count()); + this->wrapper_->json_regex_query(json_path_, pattern, &bitset); + return bitset; + } + + private: + std::string json_path_; +}; + +// JsonFlatIndex is not bound to any specific type, +// we need to reuse InvertedIndexTantivy's Build and Load implementation, so we specify the template parameter as std::string +// JsonFlatIndex should not be used to execute queries, use JsonFlatIndexQueryExecutor instead +class JsonFlatIndex : public InvertedIndexTantivy { + public: + JsonFlatIndex() : InvertedIndexTantivy() { + } + + explicit JsonFlatIndex(const storage::FileManagerContext& ctx, + const std::string& nested_path) + : InvertedIndexTantivy( + TANTIVY_INDEX_LATEST_VERSION, ctx, false, false), + nested_path_(nested_path) { + } + + void + build_index_for_json(const std::vector>& + field_datas) override; + + template + std::shared_ptr> + create_executor(std::string json_path) const { + // json path should be in the format of /a/b/c, we need to convert it to tantivy path like a.b.c + std::replace(json_path.begin(), json_path.end(), '/', '.'); + if (!json_path.empty()) { + json_path = json_path.substr(1); + } + + LOG_INFO("Create JsonFlatIndexQueryExecutor with json_path: {}", + json_path); + + return std::make_shared>(json_path, + this->wrapper_); + } + + JsonCastType + GetCastType() const override { + return JsonCastType::FromString("JSON"); + } + + std::string + GetNestedPath() const { + return nested_path_; + } + + void + finish() { + this->wrapper_->finish(); + } + + void + create_reader() { + this->wrapper_->create_reader(); + } + + private: + std::string nested_path_; +}; +} // namespace milvus::index \ No newline at end of file diff --git a/internal/core/src/index/JsonInvertedIndex.cpp b/internal/core/src/index/JsonInvertedIndex.cpp index 233a854f09..15892611b2 100644 --- a/internal/core/src/index/JsonInvertedIndex.cpp +++ b/internal/core/src/index/JsonInvertedIndex.cpp @@ -19,79 +19,11 @@ #include "common/Types.h" #include "folly/FBVector.h" #include "log/Log.h" +#include "common/JsonUtils.h" #include "simdjson/error.h" namespace milvus::index { -// Parse a JSON Pointer into unescaped path segments -std::vector -parse_json_pointer(const std::string& pointer) { - std::vector tokens; - if (pointer.empty()) - return tokens; // Root path (entire document) - if (pointer[0] != '/') { - throw std::invalid_argument( - "Invalid JSON Pointer: must start with '/'"); - } - size_t start = 1; - while (start < pointer.size()) { - size_t end = pointer.find('/', start); - if (end == std::string::npos) - end = pointer.size(); - std::string token = pointer.substr(start, end - start); - // Replace ~1 with / and ~0 with ~ - size_t pos = 0; - while ((pos = token.find("~1", pos)) != std::string::npos) { - token.replace(pos, 2, "/"); - pos += 1; // Avoid infinite loops on overlapping replacements - } - pos = 0; - while ((pos = token.find("~0", pos)) != std::string::npos) { - token.replace(pos, 2, "~"); - pos += 1; - } - tokens.push_back(token); - start = end + 1; - } - return tokens; -} - -// Check if a JSON Pointer path exists -bool -path_exists(const simdjson::dom::element& root, - const std::vector& tokens) { - simdjson::dom::element current = root; - for (const auto& token : tokens) { - if (current.type() == simdjson::dom::element_type::OBJECT) { - auto obj = current.get_object(); - if (obj.error()) - return false; - auto next = obj.value().at_key(token); - if (next.error()) - return false; - current = next.value(); - } else if (current.type() == simdjson::dom::element_type::ARRAY) { - if (token == "-") - return false; // "-" is invalid for existence checks - char* endptr; - long index = strtol(token.c_str(), &endptr, 10); - if (*endptr != '\0' || index < 0) - return false; // Not a valid index - auto arr = current.get_array(); - if (arr.error()) - return false; - if (static_cast(index) >= arr.value().size()) - return false; - auto next = arr.value().at(index); - if (next.error()) - return false; - current = next.value(); - } else { - return false; // Path cannot be resolved - } - } - return true; -} template void JsonInvertedIndex::build_index_for_json( @@ -120,9 +52,7 @@ JsonInvertedIndex::build_index_for_json( } auto exists = path_exists(json_column->dom_doc(), tokens); - if (!exists || - nested_path_ != "" && - json_column->doc().at_pointer(nested_path_).is_null()) { + if (!exists || !json_column->exist(nested_path_)) { error_recorder_.Record( *json_column, nested_path_, simdjson::NO_SUCH_FIELD); this->null_offset_.push_back(offset); diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index 04c2bfc740..e2a45eded0 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -44,6 +44,7 @@ #include "google/protobuf/message_lite.h" #include "index/Index.h" #include "index/IndexFactory.h" +#include "index/JsonFlatIndex.h" #include "index/VectorMemIndex.h" #include "mmap/ChunkedColumn.h" #include "mmap/Types.h" @@ -163,11 +164,12 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) { if (field_meta.get_data_type() == DataType::JSON) { auto path = info.index_params.at(JSON_PATH); - JSONIndexKey key; - key.nested_path = path; - key.field_id = field_id; - json_indexings_[key] = - std::move(const_cast(info).index); + JsonIndex index; + index.nested_path = path; + index.field_id = field_id; + index.index = std::move(const_cast(info).index); + index.cast_type = index.index->GetCastType(); + json_indices.push_back(std::move(index)); return; } diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 906f9e23c1..a1afd01459 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,8 @@ #include "common/QueryResult.h" #include "common/QueryInfo.h" #include "mmap/ChunkedColumnInterface.h" +#include "index/Index.h" +#include "index/JsonFlatIndex.h" #include "query/Plan.h" #include "pb/segcore.pb.h" #include "index/SkipIndex.h" @@ -245,6 +248,17 @@ class SegmentInternalInterface : public SegmentInterface { return PinWrapper*>(pw, ptr); } + // We should not expose this interface directly, but access the index through chunk_scalar_index. + // However, chunk_scalar_index requires specifying a template parameter, which makes it impossible to return JsonFlatIndex. + // A better approach would be to have chunk_scalar_index return a pointer to a base class, + // and then use dynamic_cast to convert it. But this would cause a lot of code changes, so for now, we will do it this way. + PinWrapper + chunk_json_index(FieldId field_id, + std::string& json_path, + int64_t chunk_id) const { + return chunk_index_impl(field_id, json_path, chunk_id); + } + // union(segment_id, field_id) as unique id virtual std::string GetUniqueFieldId(int64_t field_id) const { @@ -491,7 +505,7 @@ class SegmentInternalInterface : public SegmentInterface { public: virtual PinWrapper chunk_index_impl(FieldId field_id, - std::string path, + const std::string& path, int64_t chunk_id) const { PanicInfo(ErrorCode::NotImplemented, "not implemented"); }; diff --git a/internal/core/src/segcore/SegmentSealed.h b/internal/core/src/segcore/SegmentSealed.h index 9adbd47f01..7d418673a8 100644 --- a/internal/core/src/segcore/SegmentSealed.h +++ b/internal/core/src/segcore/SegmentSealed.h @@ -12,11 +12,15 @@ #pragma once #include +#include #include +#include "common/JsonCastType.h" #include "common/LoadInfo.h" #include "common/Types.h" #include "index/Index.h" +#include "index/JsonInvertedIndex.h" +#include "index/JsonFlatIndex.h" #include "pb/segcore.pb.h" #include "segcore/InsertRecord.h" #include "segcore/SegmentInterface.h" @@ -53,14 +57,38 @@ class SegmentSealed : public SegmentInternalInterface { virtual index::IndexBase* GetJsonIndex(FieldId field_id, std::string path) const override { - JSONIndexKey key; - key.field_id = field_id; - key.nested_path = path; - auto index = json_indexings_.find(key); - if (index == json_indexings_.end()) { - return nullptr; + int path_len_diff = std::numeric_limits::max(); + index::IndexBase* best_match = nullptr; + std::string_view path_view = path; + for (const auto& index : json_indices) { + if (index.field_id != field_id) { + continue; + } + switch (index.cast_type.data_type()) { + case JsonCastType::DataType::JSON: + if (path_view.length() < index.nested_path.length()) { + continue; + } + if (path_view.substr(0, index.nested_path.length()) == + index.nested_path) { + int current_len_diff = + path_view.length() - index.nested_path.length(); + if (current_len_diff < path_len_diff) { + path_len_diff = current_len_diff; + best_match = index.index.get(); + } + if (path_len_diff == 0) { + return best_match; + } + } + break; + default: + if (index.nested_path == path) { + return index.index.get(); + } + } } - return index->second.get(); + return best_match; } virtual void @@ -73,19 +101,6 @@ class SegmentSealed : public SegmentInternalInterface { return SegmentType::Sealed; } - PinWrapper - chunk_index_impl(FieldId field_id, - std::string path, - int64_t chunk_id) const override { - JSONIndexKey key; - key.field_id = field_id; - key.nested_path = path; - AssertInfo(json_indexings_.find(key) != json_indexings_.end(), - "Cannot find json index with path: " + path); - return PinWrapper( - json_indexings_.at(key).get()); - } - virtual bool HasIndex(FieldId field_id) const override = 0; bool @@ -94,43 +109,48 @@ class SegmentSealed : public SegmentInternalInterface { DataType data_type, bool any_type = false, bool is_json_contain = false) const override { - JSONIndexKey key; - key.field_id = field_id; - key.nested_path = path; - auto index = json_indexings_.find(key); - if (index == json_indexings_.end()) { - return false; - } - if (any_type) { - return true; - } - return index->second->IsDataTypeSupported(data_type, is_json_contain); + auto it = std::find_if( + json_indices.begin(), + json_indices.end(), + [field_id, path, data_type, any_type, is_json_contain]( + const JsonIndex& index) { + if (index.field_id != field_id) { + return false; + } + if (index.cast_type.data_type() == + JsonCastType::DataType::JSON) { + // for json flat index, path should be a subpath of nested_path + return path.substr(0, index.nested_path.length()) == + index.nested_path; + } + if (any_type) { + return true; + } + return index.nested_path == path && + index.index->IsDataTypeSupported(data_type, + is_json_contain); + }); + return it != json_indices.end(); } protected: - struct JSONIndexKey { + virtual PinWrapper + chunk_index_impl(FieldId field_id, int64_t chunk_id) const override = 0; + + PinWrapper + chunk_index_impl(FieldId field_id, + const std::string& path, + int64_t chunk_id) const override { + return GetJsonIndex(field_id, path); + } + struct JsonIndex { FieldId field_id; std::string nested_path; - bool - operator==(const JSONIndexKey& other) const { - return field_id == other.field_id && - nested_path == other.nested_path; - } + JsonCastType cast_type{JsonCastType::UNKNOWN}; + index::IndexBasePtr index; }; - struct hash_helper { - size_t - operator()(const JSONIndexKey& k) const { - std::hash h1; - std::hash h2; - size_t hash_result = 0; - boost::hash_combine(hash_result, h1(k.field_id.get())); - boost::hash_combine(hash_result, h2(k.nested_path)); - return hash_result; - } - }; - std::unordered_map - json_indexings_; + std::vector json_indices; }; using SegmentSealedSPtr = std::shared_ptr; diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 49c76a08fd..4d10b99129 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -18,6 +18,7 @@ #include #include "arrow/array/builder_binary.h" +#include "arrow/array/builder_nested.h" #include "arrow/scalar.h" #include "arrow/type_fwd.h" #include "fmt/format.h" diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock b/internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock index 37f111c5f8..37b220d1af 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock +++ b/internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock @@ -25,10 +25,10 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom 0.2.15", + "getrandom 0.2.16", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -119,9 +119,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arc-swap" @@ -143,13 +143,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -234,9 +234,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "bitpacking" @@ -258,9 +258,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.3.2" +version = "3.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe7acc34ff59877422326db7d6f2d845a582b16396b6b08194942bf34c6528ab" +checksum = "ced38439e7a86a4761f7f7d5ded5ff009135939ecb464a24452eaa4c1696af7d" dependencies = [ "bon-macros", "rustversion", @@ -268,9 +268,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.3.2" +version = "3.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4159dd617a7fbc9be6a692fe69dc2954f8e6bb6bb5e4d7578467441390d77fd0" +checksum = "0ce61d2d3844c6b8d31b2353d9f66cf5e632b3e9549583fe3cac2f4f6136725e" dependencies = [ "darling", "ident_case", @@ -278,7 +278,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -294,9 +294,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.2" +version = "4.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -316,9 +316,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cast" @@ -356,9 +356,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.13" +version = "1.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7777341816418c02e033934a09f20dc0ccaf65a5201ef8a450ae0105a573fda" +checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" dependencies = [ "jobserver", "libc", @@ -481,7 +481,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "once_cell", "tiny-keccak", ] @@ -558,9 +558,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.14" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -610,18 +610,18 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] [[package]] name = "darling" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -629,27 +629,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "darling_macro" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -668,9 +668,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.11" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" dependencies = [ "powerfmt", "serde", @@ -694,7 +694,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -704,7 +704,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -715,7 +715,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -726,9 +726,9 @@ checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encoding" @@ -824,28 +824,28 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ "anstream", "anstyle", "env_filter", - "humantime", + "jiff", "log", ] [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", "windows-sys 0.59.0", @@ -864,9 +864,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ "event-listener", "pin-project-lite", @@ -909,9 +909,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -925,9 +925,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "foldhash" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "foreign-types" @@ -969,7 +969,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47" dependencies = [ - "rustix", + "rustix 0.38.44", "windows-sys 0.48.0", ] @@ -979,7 +979,7 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" dependencies = [ - "rustix", + "rustix 0.38.44", "windows-sys 0.52.0", ] @@ -1045,7 +1045,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1089,9 +1089,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", @@ -1100,14 +1100,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -1124,9 +1124,9 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "h2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +checksum = "75249d144030531f8dee69fe9cea04d3edf809a017ae445e2abdff6629e86633" dependencies = [ "atomic-waker", "bytes", @@ -1134,7 +1134,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.7.1", + "indexmap 2.9.0", "slab", "tokio", "tokio-util", @@ -1213,9 +1213,9 @@ checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" [[package]] name = "http" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -1234,12 +1234,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", - "futures-util", + "futures-core", "http", "http-body", "pin-project-lite", @@ -1247,15 +1247,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "hyper" @@ -1312,9 +1306,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2" dependencies = [ "bytes", "futures-channel", @@ -1322,6 +1316,7 @@ dependencies = [ "http", "http-body", "hyper", + "libc", "pin-project-lite", "socket2", "tokio", @@ -1430,9 +1425,9 @@ dependencies = [ [[package]] name = "icu_locid_transform_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" [[package]] name = "icu_normalizer" @@ -1454,9 +1449,9 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" [[package]] name = "icu_properties" @@ -1475,9 +1470,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" [[package]] name = "icu_provider" @@ -1532,7 +1527,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1620,9 +1615,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1681,9 +1676,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jieba-rs" @@ -1701,11 +1696,36 @@ dependencies = [ ] [[package]] -name = "jobserver" -version = "0.1.32" +name = "jiff" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +checksum = "5a064218214dc6a10fbae5ec5fa888d80c45d611aba169222fc272072bf7aef6" dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199b7932d97e325aff3a7030e141eafe7f2c6268e1d1b24859b753a627f45254" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom 0.3.2", "libc", ] @@ -1742,15 +1762,15 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libm" -version = "0.2.11" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" [[package]] name = "libredox" @@ -1758,16 +1778,16 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "libc", "redox_syscall", ] [[package]] name = "lindera" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e4788d40d68746f81d50faba969dca5d9226ab527a9da90a7708dcbd1203f69" +checksum = "5fc96440caa8bb9e832f1fc737ca807d603117e598ebb7d00c96d9558e8d7a30" dependencies = [ "anyhow", "bincode", @@ -1795,9 +1815,9 @@ dependencies = [ [[package]] name = "lindera-cc-cedict" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d2239d15eb62732f2912fee43585598e61dfe62600df13a8c54ae1c4b1ac71" +checksum = "97718bf9918752bd31d9de979a1a900768b54ab741f6b0e52c22ac22205a72ec" dependencies = [ "bincode", "byteorder", @@ -1808,9 +1828,9 @@ dependencies = [ [[package]] name = "lindera-dictionary" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5a12c962d4acf12d4bca442fd6420ca7be5e78163167443dd95503ab8b3ee0" +checksum = "396b5be33424f4843e6dd8f2c98baebb1697cac3c0807846a07d9c65968fb849" dependencies = [ "anyhow", "bincode", @@ -1827,15 +1847,15 @@ dependencies = [ "reqwest", "serde", "tar", - "thiserror 2.0.11", + "thiserror 2.0.12", "yada", ] [[package]] name = "lindera-ipadic" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64227c9bd30152825df8693300b11fc370e93dd3b8f833d202b7a1709625104" +checksum = "495e16ea16f066739b4d4920dac11e9a46dfd12fdfb6062b41c06d8d41776059" dependencies = [ "bincode", "byteorder", @@ -1846,9 +1866,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-neologd" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36f3516601e9b4bee37cc85126928fb19b70c0bc15210775d93019d07ebecda" +checksum = "e4eea658adf10e0ee13059599c708b5ffac3058fc78984f792e1a703a1fbb1a8" dependencies = [ "bincode", "byteorder", @@ -1859,9 +1879,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479bb34668a96bee04d9d39107c8e7b52a09f1b115c255aa4ccd5962b1fa5a86" +checksum = "c1cb7bbef870ea429532b05c37f246e2a7c47e349309ccc526e1323af3c78490" dependencies = [ "bincode", "byteorder", @@ -1872,9 +1892,9 @@ dependencies = [ [[package]] name = "lindera-unidic" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f95b856c9a016df9f713ffc34fdf5d5e8e4353779fe1f114e95cfa2e04fb22" +checksum = "b074127b5117e24bf2023c4ec569e1055254113a6c243bac92261e043a94eb11" dependencies = [ "bincode", "byteorder", @@ -2664,10 +2684,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] -name = "litemap" -version = "0.7.4" +name = "linux-raw-sys" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "litemap" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" [[package]] name = "lock_api" @@ -2681,9 +2707,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" @@ -2772,9 +2798,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -2798,9 +2824,9 @@ checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" [[package]] name = "native-tls" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" dependencies = [ "libc", "log", @@ -2924,15 +2950,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oneshot" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79d72a7c0f743d2ebb0a2ad1d219db75fdc799092ed3a884c9144c42a31225bd" +checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" [[package]] name = "oorandom" @@ -2942,11 +2968,11 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.70" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "foreign-types", "libc", @@ -2963,7 +2989,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2974,9 +3000,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.105" +version = "0.9.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc" +checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" dependencies = [ "cc", "libc", @@ -3083,9 +3109,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" @@ -3115,6 +3141,21 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.2" @@ -3133,41 +3174,47 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.24", ] [[package]] name = "prettyplease" -version = "0.2.29" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ "proc-macro2", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.3.23" @@ -3233,7 +3280,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] @@ -3277,11 +3324,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.8" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -3321,9 +3368,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.12" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ "base64 0.22.1", "bytes", @@ -3365,15 +3412,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.15", + "getrandom 0.2.16", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -3411,18 +3457,31 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" +dependencies = [ + "bitflags 2.9.0", + "errno", + "libc", + "linux-raw-sys 0.9.4", "windows-sys 0.59.0", ] [[package]] name = "rustls" -version = "0.23.22" +version = "0.23.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +checksum = "df51b5869f3a441595eac5e8ff14d486ff285f7b8c0df8770e49c3b56351f0f0" dependencies = [ "once_cell", "rustls-pki-types", @@ -3448,9 +3507,9 @@ checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "ring", "rustls-pki-types", @@ -3459,15 +3518,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -3499,7 +3558,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "core-foundation", "core-foundation-sys", "libc", @@ -3544,7 +3603,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3577,7 +3636,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.7.1", + "indexmap 2.9.0", "itoa", "ryu", "serde", @@ -3625,26 +3684,20 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "socket2" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" dependencies = [ "libc", "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3688,7 +3741,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3710,9 +3763,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -3736,7 +3789,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3745,7 +3798,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "core-foundation", "system-configuration-sys", ] @@ -3862,7 +3915,7 @@ dependencies = [ "tantivy-stacker 0.3.0", "tantivy-tokenizer-api 0.3.0", "tempfile", - "thiserror 2.0.11", + "thiserror 2.0.12", "time", "tokio", "uuid", @@ -4067,9 +4120,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" dependencies = [ "filetime", "libc", @@ -4078,15 +4131,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.16.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" +checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ - "cfg-if", "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.2", "once_cell", - "rustix", + "rustix 1.0.5", "windows-sys 0.59.0", ] @@ -4101,9 +4153,9 @@ dependencies = [ [[package]] name = "textwrap" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" [[package]] name = "thiserror" @@ -4116,11 +4168,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl 2.0.12", ] [[package]] @@ -4131,25 +4183,25 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "time" -version = "0.3.37" +version = "0.3.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", "itoa", @@ -4162,15 +4214,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" [[package]] name = "time-macros" -version = "0.2.19" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" dependencies = [ "num-conv", "time-core", @@ -4217,9 +4269,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" dependencies = [ "tinyvec_macros", ] @@ -4232,9 +4284,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.43.0" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -4254,7 +4306,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -4269,9 +4321,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ "rustls", "tokio", @@ -4279,9 +4331,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" dependencies = [ "bytes", "futures-core", @@ -4359,9 +4411,9 @@ checksum = "6b12e05d9e06373163a9bb6bb8c263c261b396643a99445fe6b9811fd376581b" [[package]] name = "unicode-ident" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-normalization" @@ -4427,11 +4479,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", "serde", ] @@ -4474,9 +4526,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -4503,7 +4555,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -4538,7 +4590,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4604,33 +4656,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows-registry" -version = "0.2.0" +name = "windows-link" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ "windows-result", "windows-strings", - "windows-targets 0.52.6", + "windows-targets 0.53.0", ] [[package]] name = "windows-result" -version = "0.2.0" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" dependencies = [ - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.1.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ - "windows-result", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -4684,13 +4741,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -4703,6 +4776,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -4715,6 +4794,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -4727,12 +4812,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -4745,6 +4842,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -4757,6 +4860,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -4769,6 +4878,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -4782,12 +4897,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "wit-bindgen-rt" -version = "0.33.0" +name = "windows_x86_64_msvc" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -4810,13 +4931,12 @@ checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" [[package]] name = "xattr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e105d177a3871454f754b33bb0ee637ecaaac997446375fd3e5d43a2ed00c909" +checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" dependencies = [ "libc", - "linux-raw-sys", - "rustix", + "rustix 1.0.5", ] [[package]] @@ -4857,7 +4977,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4869,7 +4989,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4879,8 +4999,16 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +dependencies = [ + "zerocopy-derive 0.8.24", ] [[package]] @@ -4891,27 +5019,38 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", ] [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4960,7 +5099,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -4971,7 +5110,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index e0c44439dd..359dfaf61c 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -12,6 +12,7 @@ enum class TantivyDataType : uint8_t { I64, F64, Bool, + JSON, }; struct RustArray { @@ -202,6 +203,72 @@ RustResult tantivy_prefix_query_keyword(void *ptr, const char *prefix, void *bit RustResult tantivy_regex_query(void *ptr, const char *pattern, void *bitset); +RustResult tantivy_json_term_query_i64(void *ptr, + const char *json_path, + int64_t term, + void *bitset); + +RustResult tantivy_json_term_query_f64(void *ptr, const char *json_path, double term, void *bitset); + +RustResult tantivy_json_term_query_bool(void *ptr, const char *json_path, bool term, void *bitset); + +RustResult tantivy_json_term_query_keyword(void *ptr, + const char *json_path, + const char *term, + void *bitset); + +RustResult tantivy_json_exist_query(void *ptr, const char *json_path, void *bitset); + +RustResult tantivy_json_range_query_i64(void *ptr, + const char *json_path, + int64_t lower_bound, + int64_t higher_bound, + bool lb_unbounded, + bool up_unbounded, + bool lb_inclusive, + bool ub_inclusive, + void *bitset); + +RustResult tantivy_json_range_query_f64(void *ptr, + const char *json_path, + double lower_bound, + double higher_bound, + bool lb_unbounded, + bool up_unbounded, + bool lb_inclusive, + bool ub_inclusive, + void *bitset); + +RustResult tantivy_json_range_query_bool(void *ptr, + const char *json_path, + bool lower_bound, + bool higher_bound, + bool lb_unbounded, + bool up_unbounded, + bool lb_inclusive, + bool ub_inclusive, + void *bitset); + +RustResult tantivy_json_range_query_keyword(void *ptr, + const char *json_path, + const char *lower_bound, + const char *higher_bound, + bool lb_unbounded, + bool up_unbounded, + bool lb_inclusive, + bool ub_inclusive, + void *bitset); + +RustResult tantivy_json_regex_query(void *ptr, + const char *json_path, + const char *pattern, + void *bitset); + +RustResult tantivy_json_prefix_query(void *ptr, + const char *json_path, + const char *prefix, + void *bitset); + RustResult tantivy_match_query(void *ptr, const char *query, void *bitset); RustResult tantivy_phrase_match_query(void *ptr, const char *query, uint32_t slop, void *bitset); @@ -303,6 +370,13 @@ RustResult tantivy_index_add_json_key_stats_data_by_batch(void *ptr, const uintptr_t *json_offsets_len, uintptr_t len); +RustResult tantivy_index_add_json(void *ptr, const char *s, int64_t offset); + +RustResult tantivy_index_add_array_json(void *ptr, + const char *const *array, + uintptr_t len, + int64_t offset); + RustResult tantivy_index_add_array_int8s(void *ptr, const int8_t *array, uintptr_t len, diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/data_type.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/data_type.rs index 72b43a7565..b74f4d8e11 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/data_type.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/data_type.rs @@ -8,4 +8,5 @@ pub enum TantivyDataType { I64, F64, Bool, + JSON, } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs index 3391c4dea2..41f9b2cb32 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs @@ -2,7 +2,8 @@ use std::ffi::c_void; use std::ops::Bound; use std::sync::Arc; -use tantivy::query::{Query, RangeQuery, RegexQuery, TermQuery}; +use tantivy::fastfield::FastValue; +use tantivy::query::{ExistsQuery, Query, RangeQuery, RegexQuery, TermQuery}; use tantivy::schema::{Field, IndexRecordOption}; use tantivy::{Index, IndexReader, ReloadPolicy, Term}; @@ -338,6 +339,146 @@ impl IndexReaderWrapper { let q = RegexQuery::from_pattern(&pattern, self.field)?; self.search(&q, bitset) } + + // JSON related query methods + // These methods support querying JSON fields with different data types + + pub fn json_term_query_i64( + &self, + json_path: &str, + term: i64, + bitset: *mut c_void, + ) -> Result<()> { + let mut json_term = Term::from_field_json_path(self.field, json_path, false); + json_term.append_type_and_fast_value(term); + let q = TermQuery::new(json_term, IndexRecordOption::Basic); + self.search(&q, bitset) + } + + pub fn json_term_query_f64( + &self, + json_path: &str, + term: f64, + bitset: *mut c_void, + ) -> Result<()> { + let mut json_term = Term::from_field_json_path(self.field, json_path, false); + json_term.append_type_and_fast_value(term); + let q = TermQuery::new(json_term, IndexRecordOption::Basic); + self.search(&q, bitset) + } + + pub fn json_term_query_bool( + &self, + json_path: &str, + term: bool, + bitset: *mut c_void, + ) -> Result<()> { + let mut json_term = Term::from_field_json_path(self.field, json_path, false); + json_term.append_type_and_fast_value(term); + let q = TermQuery::new(json_term, IndexRecordOption::Basic); + self.search(&q, bitset) + } + + pub fn json_term_query_keyword( + &self, + json_path: &str, + term: &str, + bitset: *mut c_void, + ) -> Result<()> { + let mut json_term = Term::from_field_json_path(self.field, json_path, false); + json_term.append_type_and_str(term); + let q = TermQuery::new(json_term, IndexRecordOption::Basic); + self.search(&q, bitset) + } + + pub fn json_exist_query(&self, json_path: &str, bitset: *mut c_void) -> Result<()> { + let full_json_path = if json_path == "" { + self.field_name.clone() + } else { + format!("{}.{}", self.field_name, json_path) + }; + let q = ExistsQuery::new(full_json_path, true); + self.search(&q, bitset) + } + + pub fn json_range_query( + &self, + json_path: &str, + lower_bound: T, + higher_bound: T, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, + ) -> Result<()> { + let lb = if lb_unbounded { + Bound::Unbounded + } else { + let mut term = Term::from_field_json_path(self.field, json_path, false); + term.append_type_and_fast_value::(lower_bound); + make_bounds(term, lb_inclusive) + }; + let ub = if up_unbounded { + Bound::Unbounded + } else { + let mut term = Term::from_field_json_path(self.field, json_path, false); + term.append_type_and_fast_value::(higher_bound); + make_bounds(term, ub_inclusive) + }; + let q = RangeQuery::new(lb, ub); + self.search(&q, bitset) + } + + pub fn json_range_query_keyword( + &self, + json_path: &str, + lower_bound: &str, + higher_bound: &str, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, + ) -> Result<()> { + let lb = if lb_unbounded { + Bound::Unbounded + } else { + let mut term = Term::from_field_json_path(self.field, json_path, false); + term.append_type_and_str(lower_bound); + make_bounds(term, lb_inclusive) + }; + let ub = if up_unbounded { + Bound::Unbounded + } else { + let mut term = Term::from_field_json_path(self.field, json_path, false); + term.append_type_and_str(higher_bound); + make_bounds(term, ub_inclusive) + }; + let q = RangeQuery::new(lb, ub); + self.search(&q, bitset) + } + + pub fn json_regex_query( + &self, + json_path: &str, + pattern: &str, + bitset: *mut c_void, + ) -> Result<()> { + let q = RegexQuery::from_pattern_with_json_path(pattern, self.field, json_path)?; + self.search(&q, bitset) + } + + pub fn json_prefix_query( + &self, + json_path: &str, + prefix: &str, + bitset: *mut c_void, + ) -> Result<()> { + let escaped = regex::escape(prefix); + let pattern = format!("{}(.|\n)*", escaped); + self.json_regex_query(json_path, &pattern, bitset) + } } #[cfg(test)] diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs index 6ad74f93e0..05c8a05e6b 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs @@ -301,3 +301,216 @@ pub extern "C" fn tantivy_regex_query( let pattern = cstr_to_str!(pattern); unsafe { (*real).regex_query(pattern, bitset).into() } } + +// -------------------------json query-------------------- +#[no_mangle] +pub extern "C" fn tantivy_json_term_query_i64( + ptr: *mut c_void, + json_path: *const c_char, + term: i64, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { (*real).json_term_query_i64(json_path, term, bitset).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_term_query_f64( + ptr: *mut c_void, + json_path: *const c_char, + term: f64, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { (*real).json_term_query_f64(json_path, term, bitset).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_term_query_bool( + ptr: *mut c_void, + json_path: *const c_char, + term: bool, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { (*real).json_term_query_bool(json_path, term, bitset).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_term_query_keyword( + ptr: *mut c_void, + json_path: *const c_char, + term: *const c_char, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + let term = cstr_to_str!(term); + unsafe { + (*real) + .json_term_query_keyword(json_path, term, bitset) + .into() + } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_exist_query( + ptr: *mut c_void, + json_path: *const c_char, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { (*real).json_exist_query(json_path, bitset).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_range_query_i64( + ptr: *mut c_void, + json_path: *const c_char, + lower_bound: i64, + higher_bound: i64, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { + (*real) + .json_range_query( + json_path, + lower_bound, + higher_bound, + lb_unbounded, + up_unbounded, + lb_inclusive, + ub_inclusive, + bitset, + ) + .into() + } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_range_query_f64( + ptr: *mut c_void, + json_path: *const c_char, + lower_bound: f64, + higher_bound: f64, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { + (*real) + .json_range_query( + json_path, + lower_bound, + higher_bound, + lb_unbounded, + up_unbounded, + lb_inclusive, + ub_inclusive, + bitset, + ) + .into() + } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_range_query_bool( + ptr: *mut c_void, + json_path: *const c_char, + lower_bound: bool, + higher_bound: bool, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + unsafe { + (*real) + .json_range_query( + json_path, + lower_bound, + higher_bound, + lb_unbounded, + up_unbounded, + lb_inclusive, + ub_inclusive, + bitset, + ) + .into() + } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_range_query_keyword( + ptr: *mut c_void, + json_path: *const c_char, + lower_bound: *const c_char, + higher_bound: *const c_char, + lb_unbounded: bool, + up_unbounded: bool, + lb_inclusive: bool, + ub_inclusive: bool, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + let lower_bound = cstr_to_str!(lower_bound); + let higher_bound = cstr_to_str!(higher_bound); + unsafe { + (*real) + .json_range_query_keyword( + json_path, + lower_bound, + higher_bound, + lb_unbounded, + up_unbounded, + lb_inclusive, + ub_inclusive, + bitset, + ) + .into() + } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_regex_query( + ptr: *mut c_void, + json_path: *const c_char, + pattern: *const c_char, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + let pattern = cstr_to_str!(pattern); + unsafe { (*real).json_regex_query(json_path, pattern, bitset).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_json_prefix_query( + ptr: *mut c_void, + json_path: *const c_char, + prefix: *const c_char, + bitset: *mut c_void, +) -> RustResult { + let real = ptr as *mut IndexReaderWrapper; + let json_path = cstr_to_str!(json_path); + let prefix = cstr_to_str!(prefix); + unsafe { (*real).json_prefix_query(json_path, prefix, bitset).into() } +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index 319a39daae..2d18456529 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -1,6 +1,14 @@ use index_writer_v5::TantivyDocumentV5; use index_writer_v7::TantivyDocumentV7; use libc::c_char; +use log::info; +use tantivy::schema::{ + Field, IndexRecordOption, OwnedValue, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, + FAST, INDEXED, STRING, +}; +use tantivy::{ + doc, tokenizer, Document, Index, IndexWriter, SingleSegmentIndexWriter, TantivyDocument, +}; use crate::data_type::TantivyDataType; @@ -103,6 +111,30 @@ impl IndexWriterWrapper { } } + pub fn add_json(&mut self, data: &str, offset: Option) -> Result<()> { + match self { + IndexWriterWrapper::V5(_) => { + return Err(TantivyBindingError::InternalError( + "add json with tantivy index version 5 is not supported from tantivy with version 7" + .into(), + )); + } + IndexWriterWrapper::V7(writer) => writer.add_json(data, offset.unwrap() as u32), + } + } + + pub fn add_array_json(&mut self, datas: &[*const c_char], offset: Option) -> Result<()> { + match self { + IndexWriterWrapper::V5(_) => { + return Err(TantivyBindingError::InternalError( + "add array json with tantivy index version 5 is not supported from tantivy with version 7" + .into(), + )); + } + IndexWriterWrapper::V7(writer) => writer.add_array_json(datas, offset.unwrap() as u32), + } + } + pub fn add_array_keywords( &mut self, datas: &[*const c_char], diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index e1fd208cc9..26d4e961c3 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -445,6 +445,31 @@ pub extern "C" fn tantivy_index_add_json_key_stats_data_by_batch( } } +#[no_mangle] +pub extern "C" fn tantivy_index_add_json( + ptr: *mut c_void, + s: *const c_char, + offset: i64, +) -> RustResult { + let real = ptr as *mut IndexWriterWrapper; + let s = cstr_to_str!(s); + unsafe { (*real).add_json(s, Some(offset)).into() } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_array_json( + ptr: *mut c_void, + array: *const *const c_char, + len: usize, + offset: i64, +) -> RustResult { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = convert_to_rust_slice!(array, len); + (*real).add_array_json(arr, Some(offset)).into() + } +} + // --------------------------------------------- array ------------------------------------------ #[no_mangle] diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v5/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v5/index_writer.rs index 50edb9b8f6..69fce11be2 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v5/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v5/index_writer.rs @@ -7,8 +7,11 @@ use libc::c_char; use log::info; use tantivy_5::schema::{ Field, IndexRecordOption, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, FAST, INDEXED, + STRING, +}; +use tantivy_5::{ + doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation, }; -use tantivy_5::{doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation}; use crate::data_type::TantivyDataType; @@ -44,6 +47,7 @@ pub(crate) fn schema_builder_add_field( TantivyDataType::Text => { panic!("text should be indexed with analyzer"); } + TantivyDataType::JSON => schema_builder.add_json_field(&field_name, STRING | FAST), } } @@ -81,6 +85,13 @@ impl TantivyValue for bool { } } +impl TantivyValue for serde_json::Value { + #[inline] + fn add_to_document(&self, field: u32, document: &mut TantivyDocument) { + document.add_field_value(Field::from_field_id(field), self.clone()); + } +} + impl IndexWriterWrapperImpl { pub fn new( field_name: &str, @@ -174,6 +185,25 @@ impl IndexWriterWrapperImpl { self.add_document(document, offset) } + pub fn add_json(&mut self, data: &str, offset: Option) -> Result<()> { + let j = serde_json::from_str::(data)?; + let mut document = TantivyDocument::default(); + j.add_to_document(self.field.field_id(), &mut document); + + self.add_document(document, offset) + } + + pub fn add_array_json(&mut self, datas: &[*const c_char], offset: Option) -> Result<()> { + let mut document = TantivyDocument::default(); + for element in datas { + let data = unsafe { CStr::from_ptr(*element) }; + let j = serde_json::from_str::(data.to_str()?)?; + j.add_to_document(self.field.field_id(), &mut document); + } + + self.add_document(document, offset) + } + pub fn add_array_keywords( &mut self, datas: &[*const c_char], diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs index f1c1fdbae3..c102a863e1 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs @@ -7,7 +7,7 @@ use log::info; use tantivy::indexer::UserOperation; use tantivy::schema::{ Field, IndexRecordOption, NumericOptions, Schema, SchemaBuilder, TextFieldIndexing, - TextOptions, FAST, + TextOptions, FAST, STRING, }; use tantivy::{doc, Index, IndexWriter, TantivyDocument}; @@ -47,6 +47,7 @@ pub(crate) fn schema_builder_add_field( TantivyDataType::Text => { panic!("text should be indexed with analyzer"); } + TantivyDataType::JSON => schema_builder.add_json_field(&field_name, STRING | FAST), } } @@ -84,6 +85,13 @@ impl TantivyValue for bool { } } +impl TantivyValue for serde_json::Value { + #[inline] + fn add_to_document(&self, field: u32, document: &mut TantivyDocument) { + document.add_field_value(Field::from_field_id(field), self); + } +} + pub struct IndexWriterWrapperImpl { pub(crate) field: Field, pub(crate) index_writer: IndexWriter, @@ -174,6 +182,25 @@ impl IndexWriterWrapperImpl { self.add_document(document, offset) } + pub fn add_json(&mut self, data: &str, offset: u32) -> Result<()> { + let j = serde_json::from_str::(data)?; + let mut document = TantivyDocument::default(); + j.add_to_document(self.field.field_id(), &mut document); + + self.add_document(document, offset) + } + + pub fn add_array_json(&mut self, datas: &[*const c_char], offset: u32) -> Result<()> { + let mut document = TantivyDocument::default(); + for element in datas { + let data = unsafe { CStr::from_ptr(*element) }; + let j = serde_json::from_str::(data.to_str()?)?; + j.add_to_document(self.field.field_id(), &mut document); + } + + self.add_document(document, offset) + } + pub fn add_json_key_stats( &mut self, keys: &[*const c_char], diff --git a/internal/core/thirdparty/tantivy/tantivy-wrapper.h b/internal/core/thirdparty/tantivy/tantivy-wrapper.h index dbba8fca2b..38cfd9de4b 100644 --- a/internal/core/thirdparty/tantivy/tantivy-wrapper.h +++ b/internal/core/thirdparty/tantivy/tantivy-wrapper.h @@ -8,6 +8,7 @@ #include #include "common/EasyAssert.h" +#include "common/Json.h" #include "tantivy-binding.h" #include "rust-binding.h" #include "rust-array.h" @@ -85,10 +86,10 @@ struct TantivyIndexWrapper { const char* path, uint32_t tantivy_index_version, bool inverted_single_semgnent = false, + bool enable_user_specified_doc_id = true, uintptr_t num_threads = DEFAULT_NUM_THREADS, uintptr_t overall_memory_budget_in_bytes = - DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES, - bool enable_user_specified_doc_id = true) { + DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) { RustResultWrapper res; if (inverted_single_semgnent) { AssertInfo(tantivy_index_version == 5, @@ -309,6 +310,34 @@ struct TantivyIndexWrapper { res.result_->error); } + void + add_json_data(const Json* array, uintptr_t len, int64_t offset_begin) { + assert(!finished_); + for (uintptr_t i = 0; i < len; i++) { + auto res = RustResultWrapper(tantivy_index_add_json( + writer_, array[i].data().data(), offset_begin + i)); + AssertInfo(res.result_->success, + "failed to add json: {}", + res.result_->error); + } + } + + void + add_json_array_data(const Json* array, + uintptr_t len, + int64_t offset_begin) { + assert(!finished_); + std::vector views; + for (uintptr_t i = 0; i < len; i++) { + views.push_back(array[i].c_str()); + } + auto res = RustResultWrapper(tantivy_index_add_array_json( + writer_, views.data(), len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add multi json: {}", + res.result_->error); + } + template void add_array_data(const T* array, uintptr_t len, int64_t offset) { @@ -879,6 +908,170 @@ struct TantivyIndexWrapper { "TantivyIndexWrapper.phrase_match_query: invalid result type"); } + // json query + template + void + json_term_query(const std::string& json_path, T term, void* bitset) { + auto array = [&]() { + if constexpr (std::is_same_v) { + return tantivy_json_term_query_bool( + reader_, json_path.c_str(), term, bitset); + } + + if constexpr (std::is_integral_v) { + auto res = tantivy_json_term_query_i64( + reader_, json_path.c_str(), term, bitset); + AssertInfo(res.success, + "TantivyIndexWrapper.json_term_query: {}", + res.error); + return tantivy_json_term_query_f64( + reader_, json_path.c_str(), term, bitset); + } + + if constexpr (std::is_floating_point_v) { + // if term can be cast to int64 without precision loss, use int64 query first + if (std::floor(term) == term) { + auto res = tantivy_json_term_query_i64( + reader_, json_path.c_str(), term, bitset); + AssertInfo(res.success, + "TantivyIndexWrapper.json_term_query: {}", + res.error); + } + return tantivy_json_term_query_f64( + reader_, json_path.c_str(), term, bitset); + } + + if constexpr (std::is_same_v) { + return tantivy_json_term_query_keyword( + reader_, json_path.c_str(), term.c_str(), bitset); + } + + throw fmt::format( + "InvertedIndex.json_term_query: unsupported data type: {}", + typeid(T).name()); + return RustResult(); + }(); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.json_term_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::None, + "TantivyIndexWrapper.json_term_query: invalid result type"); + } + + void + json_exist_query(const std::string& json_path, void* bitset) { + auto array = + tantivy_json_exist_query(reader_, json_path.c_str(), bitset); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.json_exist_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::None, + "TantivyIndexWrapper.json_exist_query: invalid result type"); + } + + template + void + json_range_query(const std::string& json_path, + T lower_bound, + T upper_bound, + bool lb_unbounded, + bool ub_unbounded, + bool lb_inclusive, + bool ub_inclusive, + void* bitset) { + auto array = [&]() { + if constexpr (std::is_same_v) { + return tantivy_json_range_query_bool(reader_, + json_path.c_str(), + lower_bound, + upper_bound, + lb_unbounded, + ub_unbounded, + lb_inclusive, + ub_inclusive, + bitset); + } + + if constexpr (std::is_integral_v) { + return tantivy_json_range_query_i64(reader_, + json_path.c_str(), + lower_bound, + upper_bound, + lb_unbounded, + ub_unbounded, + lb_inclusive, + ub_inclusive, + bitset); + } + + if constexpr (std::is_floating_point_v) { + return tantivy_json_range_query_f64(reader_, + json_path.c_str(), + lower_bound, + upper_bound, + lb_unbounded, + ub_unbounded, + lb_inclusive, + ub_inclusive, + bitset); + } + + if constexpr (std::is_same_v) { + return tantivy_json_range_query_keyword(reader_, + json_path.c_str(), + lower_bound.c_str(), + upper_bound.c_str(), + lb_unbounded, + ub_unbounded, + lb_inclusive, + ub_inclusive, + bitset); + } + + throw fmt::format( + "InvertedIndex.json_range_query: unsupported data type: {}", + typeid(T).name()); + return RustResult(); + }(); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.json_range_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::None, + "TantivyIndexWrapper.json_range_query: invalid result type"); + } + + void + json_regex_query(const std::string& json_path, + const std::string& pattern, + void* bitset) { + auto array = tantivy_json_regex_query( + reader_, json_path.c_str(), pattern.c_str(), bitset); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.json_regex_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::None, + "TantivyIndexWrapper.json_regex_query: invalid result type"); + } + + void + json_prefix_query(const std::string& json_path, + const std::string& prefix, + void* bitset) { + auto array = tantivy_json_prefix_query( + reader_, json_path.c_str(), prefix.c_str(), bitset); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.json_prefix_query: {}", + res.result_->error); + AssertInfo( + res.result_->value.tag == Value::Tag::None, + "TantivyIndexWrapper.json_prefix_query: invalid result type"); + } + public: inline IndexWriter get_writer() { diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 5768856468..f858e89d52 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -104,6 +104,7 @@ set(MILVUS_TEST_FILES test_group_chunk_translator.cpp test_chunked_segment_storage_v2.cpp test_thread_pool.cpp + test_json_flat_index.cpp ) if ( INDEX_ENGINE STREQUAL "cardinal" ) diff --git a/internal/core/unittest/test_json_flat_index.cpp b/internal/core/unittest/test_json_flat_index.cpp new file mode 100644 index 0000000000..022513e00d --- /dev/null +++ b/internal/core/unittest/test_json_flat_index.cpp @@ -0,0 +1,685 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include +#include + +#include "common/Consts.h" +#include "common/Tracer.h" +#include "expr/ITypeExpr.h" +#include "index/JsonFlatIndex.h" +#include "pb/plan.pb.h" +#include "plan/PlanNode.h" +#include "query/ExecPlanNodeVisitor.h" +#include "segcore/ChunkedSegmentSealedImpl.h" +#include "segcore/SegmentSealed.h" +#include "storage/RemoteChunkManagerSingleton.h" +#include "storage/Util.h" +#include "storage/InsertData.h" +#include "indexbuilder/IndexFactory.h" +#include "index/IndexFactory.h" +#include "test_utils/indexbuilder_test_utils.h" +#include "index/Meta.h" +#include "index/Index.h" +#include "common/Json.h" +#include "simdjson/padded_string.h" +#include "common/FieldData.h" +#include "test_utils/storage_test_utils.h" + +using namespace milvus; + +namespace milvus::test { +auto +generate_field_meta(int64_t collection_id = 1, + int64_t partition_id = 2, + int64_t segment_id = 3, + int64_t field_id = 101, + DataType data_type = DataType::NONE, + DataType element_type = DataType::NONE, + bool nullable = false) -> storage::FieldDataMeta { + auto meta = storage::FieldDataMeta{ + .collection_id = collection_id, + .partition_id = partition_id, + .segment_id = segment_id, + .field_id = field_id, + }; + meta.field_schema.set_data_type( + static_cast(data_type)); + meta.field_schema.set_element_type( + static_cast(element_type)); + meta.field_schema.set_nullable(nullable); + return meta; +} + +auto +generate_index_meta(int64_t segment_id = 3, + int64_t field_id = 101, + int64_t index_build_id = 1000, + int64_t index_version = 10000) -> storage::IndexMeta { + return storage::IndexMeta{ + .segment_id = segment_id, + .field_id = field_id, + .build_id = index_build_id, + .index_version = index_version, + }; +} + +auto +generate_local_storage_config(const std::string& root_path) + -> storage::StorageConfig { + auto ret = storage::StorageConfig{}; + ret.storage_type = "local"; + ret.root_path = root_path; + return ret; +} + +struct ChunkManagerWrapper { + ChunkManagerWrapper(storage::ChunkManagerPtr cm) : cm_(cm) { + } + + ~ChunkManagerWrapper() { + for (const auto& file : written_) { + cm_->Remove(file); + } + + boost::filesystem::remove_all(cm_->GetRootPath()); + } + + void + Write(const std::string& filepath, void* buf, uint64_t len) { + written_.insert(filepath); + cm_->Write(filepath, buf, len); + } + + const storage::ChunkManagerPtr cm_; + std::unordered_set written_; +}; + +class JsonFlatIndexTest : public ::testing::Test { + protected: + void + SetUp() override { + int64_t collection_id = 1; + int64_t partition_id = 2; + int64_t segment_id = 3; + int64_t field_id = 101; + int64_t index_build_id = 4000; + int64_t index_version = 4000; + + field_meta_ = test::generate_field_meta( + collection_id, partition_id, segment_id, field_id, DataType::JSON); + index_meta_ = test::generate_index_meta( + segment_id, field_id, index_build_id, index_version); + + std::string root_path = "/tmp/test-json-flat-index/"; + auto storage_config = test::generate_local_storage_config(root_path); + cm_ = storage::CreateChunkManager(storage_config); + + json_data_ = { + R"({"profile": {"name": {"first": "Alice", "last": "Smith", "preferred_name": "Al"}, "team": {"name": "Engineering", "supervisor": {"name": "Bob"}}, "is_active": true, "employee_id": 1001, "skills": ["cpp", "rust", "python"], "scores": [95, 88, 92]}})", + R"({"profile": {"name": {"first": "Bob", "last": "Johnson", "preferred_name": null}, "team": {"name": "Product", "supervisor": {"name": "Charlie"}}, "is_active": false, "employee_id": 1002, "skills": ["java", "python"], "scores": [85, 90]}})", + R"({"profile": {"name": {"first": "Charlie", "last": "Williams"}, "team": {"name": "Design", "supervisor": {"name": "Alice"}}, "is_active": true, "employee_id": 1003, "skills": ["python", "javascript"], "scores": [87, 91, 89]}})"}; + + // Create field data with JSON values + auto field_data = storage::CreateFieldData(DataType::JSON); + std::vector json_vec; + for (const auto& json_str : json_data_) { + json_vec.push_back(Json(simdjson::padded_string(json_str))); + } + field_data->FillFieldData(json_vec.data(), json_vec.size()); + + auto payload_reader = + std::make_shared(field_data); + storage::InsertData insert_data(payload_reader); + insert_data.SetFieldDataMeta(field_meta_); + insert_data.SetTimestamps(0, 100); + + auto serialized_bytes = insert_data.Serialize(storage::Remote); + + auto get_binlog_path = [=](int64_t log_id) { + return fmt::format("{}/{}/{}/{}/{}", + collection_id, + partition_id, + segment_id, + field_id, + log_id); + }; + + log_path_ = get_binlog_path(0); + + cm_w_ = std::make_unique(cm_); + cm_w_->Write( + log_path_, serialized_bytes.data(), serialized_bytes.size()); + + ctx_ = std::make_unique( + field_meta_, index_meta_, cm_); + + // Build index + Config config; + config["index_type"] = milvus::index::INVERTED_INDEX_TYPE; + config["insert_files"] = std::vector{log_path_}; + { + auto index = std::make_shared(*ctx_, ""); + index->Build(config); + + auto create_index_result = index->Upload(); + auto memSize = create_index_result->GetMemSize(); + auto serializedSize = create_index_result->GetSerializedSize(); + ASSERT_GT(memSize, 0); + ASSERT_GT(serializedSize, 0); + index_files_ = create_index_result->GetIndexFiles(); + } + + // Load index + index::CreateIndexInfo index_info{}; + index_info.index_type = milvus::index::INVERTED_INDEX_TYPE; + index_info.field_type = DataType::JSON; + + Config load_config; + load_config["index_files"] = index_files_; + + ctx_->set_for_loading_index(true); + json_index_ = std::make_shared(*ctx_, ""); + json_index_->Load(milvus::tracer::TraceContext{}, load_config); + + auto cnt = json_index_->Count(); + ASSERT_EQ(cnt, json_data_.size()); + } + + void + TearDown() override { + cm_w_.reset(); + boost::filesystem::remove_all("/tmp/test-json-flat-index/"); + } + + storage::FieldDataMeta field_meta_; + storage::IndexMeta index_meta_; + storage::ChunkManagerPtr cm_; + std::unique_ptr cm_w_; + std::unique_ptr ctx_; + std::string log_path_; + std::vector json_data_; + std::vector index_files_; + std::shared_ptr json_index_; +}; + +TEST_F(JsonFlatIndexTest, TestInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + + std::vector names = {"Alice", "Bob"}; + auto result = executor->In(names.size(), names.data()); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice + ASSERT_TRUE(result[1]); // Bob + ASSERT_FALSE(result[2]); // Charlie +} + +TEST_F(JsonFlatIndexTest, TestIsNullQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/preferred_name"; + auto executor = json_flat_index->create_executor(json_path); + auto result = executor->IsNull(); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_FALSE(result[0]); // Al + ASSERT_TRUE(result[1]); // null + ASSERT_TRUE(result[2]); // not exist +} + +TEST_F(JsonFlatIndexTest, TestIsNotNullQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/preferred_name"; + auto executor = json_flat_index->create_executor(json_path); + auto result = executor->IsNotNull(); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Al + ASSERT_FALSE(result[1]); // null + ASSERT_FALSE(result[2]); // not exist +} + +TEST_F(JsonFlatIndexTest, TestNotInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/team/name"; + auto executor = json_flat_index->create_executor(json_path); + std::vector teams = {"Engineering", "Product"}; + auto result = executor->NotIn(teams.size(), teams.data()); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_FALSE(result[0]); // Engineering + ASSERT_FALSE(result[1]); // Product + ASSERT_TRUE(result[2]); // Design +} + +TEST_F(JsonFlatIndexTest, TestRangeQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + + // Test LessThan + auto result = executor->Range(std::string("Charlie"), OpType::LessThan); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice < Charlie + ASSERT_TRUE(result[1]); // Bob < Charlie + ASSERT_FALSE(result[2]); // Charlie = Charlie + + // Test Range between bounds + auto range_result = executor->Range(std::string("Alice"), + true, // lower bound inclusive + std::string("Bob"), + true); // upper bound inclusive + ASSERT_EQ(range_result.size(), json_data_.size()); + ASSERT_TRUE(range_result[0]); // Alice in [Alice, Bob] + ASSERT_TRUE(range_result[1]); // Bob in [Alice, Bob] + ASSERT_FALSE(range_result[2]); // Charlie not in [Alice, Bob] +} + +TEST_F(JsonFlatIndexTest, TestPrefixMatchQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + auto result = executor->PrefixMatch("A"); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice starts with A + ASSERT_FALSE(result[1]); // Bob doesn't start with A + ASSERT_FALSE(result[2]); // Charlie doesn't start with A +} + +TEST_F(JsonFlatIndexTest, TestRegexQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + auto result = executor->RegexQuery("[AB].*ice"); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice matches [AB].*ice + ASSERT_FALSE(result[1]); // Bob doesn't match [AB].*ice + ASSERT_FALSE(result[2]); // Charlie doesn't match [AB].*ice + + // Test another regex pattern + auto result2 = executor->RegexQuery("B.b"); + ASSERT_EQ(result2.size(), json_data_.size()); + ASSERT_FALSE(result2[0]); // Alice doesn't match B.b + ASSERT_TRUE(result2[1]); // Bob matches B.b + ASSERT_FALSE(result2[2]); // Charlie doesn't match B.b +} + +TEST_F(JsonFlatIndexTest, TestPatternMatchQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + auto result = executor->PatternMatch("A%e", proto::plan::Match); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice matches A%e + ASSERT_FALSE(result[1]); // Bob doesn't match A%e + ASSERT_FALSE(result[2]); // Charlie doesn't match A%e +} + +TEST_F(JsonFlatIndexTest, TestBooleanInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/is_active"; + auto executor = json_flat_index->create_executor(json_path); + bool values[] = {true}; + auto result = executor->In(1, values); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice is active + ASSERT_FALSE(result[1]); // Bob is not active + ASSERT_TRUE(result[2]); // Charlie is active +} + +TEST_F(JsonFlatIndexTest, TestBooleanNotInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/is_active"; + auto executor = json_flat_index->create_executor(json_path); + bool values[] = {false}; + auto result = executor->NotIn(1, values); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice is not in [false] + ASSERT_FALSE(result[1]); // Bob is in [false] + ASSERT_TRUE(result[2]); // Charlie is not in [false] +} + +TEST_F(JsonFlatIndexTest, TestInt64InQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/employee_id"; + auto executor = json_flat_index->create_executor(json_path); + int64_t values[] = {1001, 1002}; + auto result = executor->In(2, values); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice's id is 1001 + ASSERT_TRUE(result[1]); // Bob's id is 1002 + ASSERT_FALSE(result[2]); // Charlie's id is 1003 +} + +TEST_F(JsonFlatIndexTest, TestInt64NotInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/employee_id"; + auto executor = json_flat_index->create_executor(json_path); + int64_t values[] = {1003}; + auto result = executor->NotIn(1, values); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice's id is not 1003 + ASSERT_TRUE(result[1]); // Bob's id is not 1003 + ASSERT_FALSE(result[2]); // Charlie's id is 1003 +} + +TEST_F(JsonFlatIndexTest, TestInt64RangeQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/employee_id"; + auto executor = json_flat_index->create_executor(json_path); + + // Test LessThan + auto result = executor->Range(int64_t(1002), OpType::LessThan); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // 1001 < 1002 + ASSERT_FALSE(result[1]); // 1002 = 1002 + ASSERT_FALSE(result[2]); // 1003 > 1002 + + // Test Range between bounds + auto range_result = executor->Range(int64_t(1001), // lower bound + true, // lower bound inclusive + int64_t(1002), // upper bound + true); // upper bound inclusive + ASSERT_EQ(range_result.size(), json_data_.size()); + ASSERT_TRUE(range_result[0]); // 1001 in [1001, 1002] + ASSERT_TRUE(range_result[1]); // 1002 in [1001, 1002] + ASSERT_FALSE(range_result[2]); // 1003 not in [1001, 1002] + + // Test GreaterEqual + auto ge_result = executor->Range(int64_t(1002), OpType::GreaterEqual); + ASSERT_EQ(ge_result.size(), json_data_.size()); + ASSERT_FALSE(ge_result[0]); // 1001 < 1002 + ASSERT_TRUE(ge_result[1]); // 1002 >= 1002 + ASSERT_TRUE(ge_result[2]); // 1003 >= 1002 +} + +TEST_F(JsonFlatIndexTest, TestArrayStringInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/skills"; + auto executor = json_flat_index->create_executor(json_path); + std::string values[] = {"cpp", "python"}; + + // Test for cpp + auto result_cpp = executor->In(1, &values[0]); + ASSERT_EQ(result_cpp.size(), json_data_.size()); + ASSERT_TRUE(result_cpp[0]); // Alice has cpp + ASSERT_FALSE(result_cpp[1]); // Bob doesn't have cpp + ASSERT_FALSE(result_cpp[2]); // Charlie doesn't have cpp + + // Test for python + auto result_python = executor->In(1, &values[1]); + ASSERT_EQ(result_python.size(), json_data_.size()); + ASSERT_TRUE(result_python[0]); // Alice has python + ASSERT_TRUE(result_python[1]); // Bob has python + ASSERT_TRUE(result_python[2]); // Charlie has python +} + +TEST_F(JsonFlatIndexTest, TestArrayNumberInQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/scores"; + auto executor = json_flat_index->create_executor(json_path); + int64_t values[] = {95, 90}; + + // Test for score 95 + auto result_95 = executor->In(1, &values[0]); + ASSERT_EQ(result_95.size(), json_data_.size()); + ASSERT_TRUE(result_95[0]); // Alice has score 95 + ASSERT_FALSE(result_95[1]); // Bob doesn't have score 95 + ASSERT_FALSE(result_95[2]); // Charlie doesn't have score 95 + + // Test for score 90 + auto result_90 = executor->In(1, &values[1]); + ASSERT_EQ(result_90.size(), json_data_.size()); + ASSERT_FALSE(result_90[0]); // Alice doesn't have score 90 + ASSERT_TRUE(result_90[1]); // Bob has score 90 + ASSERT_FALSE(result_90[2]); // Charlie doesn't have score 90 +} + +TEST_F(JsonFlatIndexTest, TestArrayNumberRangeQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/scores"; + auto executor = json_flat_index->create_executor(json_path); + + // Test scores greater than 90 + auto result = executor->Range(int64_t(90), OpType::GreaterThan); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice has scores > 90 (92, 95) + ASSERT_FALSE(result[1]); // Bob doesn't have scores > 90 + ASSERT_TRUE(result[2]); // Charlie has score 91 > 90 + + // Test scores in range [90, 92] + auto range_result = executor->Range(int64_t(90), // lower bound + true, // lower bound inclusive + int64_t(92), // upper bound + true); // upper bound inclusive + ASSERT_EQ(range_result.size(), json_data_.size()); + ASSERT_TRUE(range_result[0]); // Alice has score 92 + ASSERT_TRUE(range_result[1]); // Bob has score 90 + ASSERT_TRUE(range_result[2]); // Charlie has score 91 +} + +TEST_F(JsonFlatIndexTest, TestInApply) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + + std::string values[] = {"Alice", "Bob"}; + auto result = + executor->InApplyFilter(2, values, [](size_t offset) { return true; }); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_TRUE(result[0]); // Alice + ASSERT_TRUE(result[1]); // Bob + ASSERT_FALSE(result[2]); // Charlie +} + +TEST_F(JsonFlatIndexTest, TestInApplyCallback) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/name/first"; + auto executor = json_flat_index->create_executor(json_path); + std::string values[] = {"Alice", "Bob"}; + executor->InApplyCallback(2, values, [](size_t offset) { + ASSERT_TRUE(offset == 0 || offset == 1); + }); +} + +TEST_F(JsonFlatIndexTest, TestQuery) { + auto json_flat_index = + dynamic_cast(json_index_.get()); + ASSERT_NE(json_flat_index, nullptr); + + std::string json_path = "/profile/employee_id"; + auto executor = json_flat_index->create_executor(json_path); + + auto dataset = std::make_unique(); + dataset->Set(milvus::index::OPERATOR_TYPE, + proto::plan::OpType::GreaterThan); + dataset->Set(milvus::index::RANGE_VALUE, 1001); + auto result = executor->Query(std::move(dataset)); + ASSERT_EQ(result.size(), json_data_.size()); + ASSERT_FALSE(result[0]); // Alice + ASSERT_TRUE(result[1]); // Bob + ASSERT_TRUE(result[2]); // Charlie +} + +class JsonFlatIndexExprTest : public ::testing::Test { + protected: + void + SetUp() override { + json_data_ = { + R"({"a": 1.0})", + R"({"a": "abc"})", + R"({"a": 3.0})", + R"({"a": true})", + R"({"a": {"b": 1}})", + R"({"a": []})", + R"({"a": ["a", "b"]})", + R"({"a": null})", // exists null + R"(1)", + R"("abc")", + R"(1.0)", + R"(true)", + R"([1, 2, 3])", + R"({"a": 1, "b": 2})", + R"({})", + R"(null)", + }; + + auto json_index_path = ""; + + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto i64_fid = schema->AddDebugField("age64", DataType::INT64); + json_fid_ = schema->AddDebugField("json", DataType::JSON, true); + schema->set_primary_field_id(i64_fid); + + segment_ = segcore::CreateSealedSegment(schema); + segcore::LoadIndexInfo load_index_info; + + auto file_manager_ctx = storage::FileManagerContext(); + file_manager_ctx.fieldDataMeta.field_schema.set_data_type( + milvus::proto::schema::JSON); + file_manager_ctx.fieldDataMeta.field_schema.set_fieldid( + json_fid_.get()); + file_manager_ctx.fieldDataMeta.field_schema.set_nullable(true); + auto index = index::IndexFactory::GetInstance().CreateJsonIndex( + index::INVERTED_INDEX_TYPE, + JsonCastType::FromString("JSON"), + json_index_path, + file_manager_ctx); + + json_index_ = std::unique_ptr( + static_cast(index.release())); + + auto json_field = + std::make_shared>(DataType::JSON, true); + std::vector jsons; + for (auto& json_str : json_data_) { + jsons.push_back(milvus::Json(simdjson::padded_string(json_str))); + } + json_field->add_json_data(jsons); + auto json_valid_data = json_field->ValidData(); + json_valid_data[0] = 0xFF; + json_valid_data[1] = 0xFF; + + json_index_->BuildWithFieldData({json_field}); + json_index_->finish(); + json_index_->create_reader(); + + load_index_info.field_id = json_fid_.get(); + load_index_info.field_type = DataType::JSON; + load_index_info.index = std::move(json_index_); + load_index_info.index_params = {{JSON_PATH, json_index_path}}; + segment_->LoadIndex(load_index_info); + auto cm = milvus::storage::RemoteChunkManagerSingleton::GetInstance() + .GetRemoteChunkManager(); + auto load_info = PrepareSingleFieldInsertBinlog( + 1, 1, 1, json_fid_.get(), {json_field}, cm); + segment_->LoadFieldData(load_info); + } + + void + TearDown() override { + } + + FieldId json_fid_; + std::vector json_data_; + std::unique_ptr json_index_; + segcore::SegmentSealedUPtr segment_; +}; + +TEST_F(JsonFlatIndexExprTest, TestUnaryExpr) { + proto::plan::GenericValue value; + value.set_int64_val(1); + auto expr = std::make_shared( + expr::ColumnInfo(json_fid_, DataType::JSON, {""}), + proto::plan::OpType::GreaterEqual, + value, + std::vector()); + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + auto final = query::ExecuteQueryExpr( + plan, segment_.get(), json_data_.size(), MAX_TIMESTAMP); + EXPECT_EQ(final.count(), 3); + EXPECT_TRUE(final[8]); + EXPECT_TRUE(final[10]); + EXPECT_TRUE(final[12]); +} + +TEST_F(JsonFlatIndexExprTest, TestExistsExpr) { + auto expr = std::make_shared( + expr::ColumnInfo(json_fid_, DataType::JSON, {""})); + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + auto final = query::ExecuteQueryExpr( + plan, segment_.get(), json_data_.size(), MAX_TIMESTAMP); + EXPECT_EQ(final.count(), 12); + EXPECT_FALSE(final[5]); + EXPECT_FALSE(final[7]); + EXPECT_FALSE(final[14]); + EXPECT_FALSE(final[15]); +} +} // namespace milvus::test diff --git a/internal/datacoord/index_meta.go b/internal/datacoord/index_meta.go index c7578cb548..c234d21b1a 100644 --- a/internal/datacoord/index_meta.go +++ b/internal/datacoord/index_meta.go @@ -248,21 +248,17 @@ func (m *indexMeta) updateIndexTasksMetrics() { log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics))) } -func checkJsonParams(index *model.Index, req *indexpb.CreateIndexRequest) bool { - castType1, err := getIndexParam(index.IndexParams, common.JSONCastTypeKey) - if err != nil { +func checkIdenticalJson(index *model.Index, req *indexpb.CreateIndexRequest) bool { + // Skip error handling since json path existence is guaranteed in CreateIndex + jsonPath1, _ := getIndexParam(index.IndexParams, common.JSONPathKey) + jsonPath2, _ := getIndexParam(req.GetIndexParams(), common.JSONPathKey) + + if jsonPath1 != jsonPath2 { return false } - castType2, err := getIndexParam(req.GetIndexParams(), common.JSONCastTypeKey) - if err != nil || castType1 != castType2 { - return false - } - jsonPath1, err := getIndexParam(index.IndexParams, common.JSONPathKey) - if err != nil { - return false - } - jsonPath2, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey) - return err == nil && jsonPath1 == jsonPath2 + castType1, _ := getIndexParam(index.IndexParams, common.JSONCastTypeKey) + castType2, _ := getIndexParam(req.GetIndexParams(), common.JSONCastTypeKey) + return castType1 == castType2 } func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool { @@ -370,7 +366,8 @@ func (m *indexMeta) canCreateIndex(req *indexpb.CreateIndexRequest, isJson bool) continue } if req.IndexName == index.IndexName { - if req.FieldID == index.FieldID && checkParams(index, req) && (!isJson || checkJsonParams(index, req)) { + if req.FieldID == index.FieldID && checkParams(index, req) && + /*only check json params when it is json index*/ (!isJson || checkIdenticalJson(index, req)) { return index.IndexID, nil } errMsg := "at most one distinct index is allowed per field" @@ -383,16 +380,12 @@ func (m *indexMeta) canCreateIndex(req *indexpb.CreateIndexRequest, isJson bool) } if req.FieldID == index.FieldID { if isJson { - // if it is json index, check if json paths are same - jsonPath1, err := getIndexParam(index.IndexParams, common.JSONPathKey) - if err != nil { - return 0, err - } - jsonPath2, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey) - if err != nil { - return 0, err - } + // Skip error handling since json path existence is guaranteed in CreateIndex + jsonPath1, _ := getIndexParam(index.IndexParams, common.JSONPathKey) + jsonPath2, _ := getIndexParam(req.GetIndexParams(), common.JSONPathKey) + if jsonPath1 != jsonPath2 { + // if json path is not same, create index is allowed continue } } diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index 36bbc4e1e0..7a9c3ab3c9 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -161,16 +161,24 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques } if isJson { + // check json_path and json_cast_type exist jsonPath, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey) if err != nil { - log.Error("get json path from index params failed", zap.Error(err)) + log.Warn("get json path failed", zap.Error(err)) return merr.Status(err), nil } + _, err = getIndexParam(req.GetIndexParams(), common.JSONCastTypeKey) + if err != nil { + log.Warn("get json cast type failed", zap.Error(err)) + return merr.Status(err), nil + } + nestedPath, err := s.parseAndVerifyNestedPath(jsonPath, schema, req.GetFieldID()) if err != nil { log.Error("parse nested path failed", zap.Error(err)) return merr.Status(err), nil } + // set nested path as json path setIndexParam(req.GetIndexParams(), common.JSONPathKey, nestedPath) } @@ -183,19 +191,17 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques } defaultIndexName := fieldName if isJson { - jsonPath, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey) - if err != nil { - return merr.Status(err), nil - } - + // ignore error, because it's already checked in getIndexParam before + jsonPath, _ := getIndexParam(req.GetIndexParams(), common.JSONPathKey) + // filter indexes by json path, the length of indexes should not be larger than 1 + // this is guaranteed by CanCreateIndex indexes = lo.Filter(indexes, func(index *model.Index, i int) bool { - path, err := getIndexParam(index.IndexParams, common.JSONPathKey) - return err == nil && path == jsonPath + path, _ := getIndexParam(index.IndexParams, common.JSONPathKey) + return path == jsonPath }) defaultIndexName += jsonPath } - if len(indexes) == 0 { req.IndexName = defaultIndexName } else if len(indexes) == 1 { diff --git a/internal/datacoord/index_service_test.go b/internal/datacoord/index_service_test.go index 1c66fe4baa..410b9418f7 100644 --- a/internal/datacoord/index_service_test.go +++ b/internal/datacoord/index_service_test.go @@ -19,6 +19,7 @@ package datacoord import ( "context" "math" + "strconv" "testing" "time" @@ -2822,4 +2823,31 @@ func TestJsonIndex(t *testing.T) { } resp, err = s.CreateIndex(context.Background(), req) assert.NoError(t, merr.CheckRPCCall(resp, err)) + + // test json flat index + req = &indexpb.CreateIndexRequest{ + FieldID: 0, + IndexName: "h", + IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_JSON))}, {Key: common.JSONPathKey, Value: "json[\"a\"][\"b\"]"}}, + } + resp, err = s.CreateIndex(context.Background(), req) + assert.NoError(t, merr.CheckRPCCall(resp, err)) + + // test json flat index with dynamic field + req = &indexpb.CreateIndexRequest{ + FieldID: 2, + IndexName: "i", + IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_JSON))}, {Key: common.JSONPathKey, Value: "dynamic"}}, + } + resp, err = s.CreateIndex(context.Background(), req) + assert.NoError(t, merr.CheckRPCCall(resp, err)) + + // duplicated json flat index + req = &indexpb.CreateIndexRequest{ + FieldID: 0, + IndexName: "a", + IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_JSON))}, {Key: common.JSONPathKey, Value: "json[\"a\"]"}}, + } + resp, err = s.CreateIndex(context.Background(), req) + assert.Error(t, merr.CheckRPCCall(resp, err)) }