mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
feat: support json index (#36750)
https://github.com/milvus-io/milvus/issues/35528 This PR adds json index support for json and dynamic fields. Now you can only do unary query like 'a["b"] > 1' using this index. We will support more filter type later. basic usage: ``` collection.create_index("json_field", {"index_type": "INVERTED", "params": {"json_cast_type": DataType.STRING, "json_path": 'json_field["a"]["b"]'}}) ``` There are some limits to use this index: 1. If a record does not have the json path you specify, it will be ignored and there will not be an error. 2. If a value of the json path fails to be cast to the type you specify, it will be ignored and there will not be an error. 3. A specific json path can have only one json index. 4. If you try to create more than one json indexes for one json field, sdk(pymilvus<=2.4.7) may return immediately because of internal implementation. This will be fixed in a later version. --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
5d89838ad9
commit
b59555057d
@ -77,3 +77,6 @@ const int64_t DEFAULT_BITMAP_INDEX_BUILD_MODE_BOUND = 500;
|
|||||||
const int64_t DEFAULT_HYBRID_INDEX_BITMAP_CARDINALITY_LIMIT = 100;
|
const int64_t DEFAULT_HYBRID_INDEX_BITMAP_CARDINALITY_LIMIT = 100;
|
||||||
|
|
||||||
const size_t MARISA_NULL_KEY_ID = -1;
|
const size_t MARISA_NULL_KEY_ID = -1;
|
||||||
|
|
||||||
|
const std::string JSON_CAST_TYPE = "json_cast_type";
|
||||||
|
const std::string JSON_PATH = "json_path";
|
||||||
@ -641,6 +641,20 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
|
|||||||
}
|
}
|
||||||
length_ += n;
|
length_ += n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only for test
|
||||||
|
void
|
||||||
|
add_json_data(const std::vector<Json>& json) {
|
||||||
|
std::lock_guard lck(tell_mutex_);
|
||||||
|
if (length_ + json.size() > get_num_rows()) {
|
||||||
|
resize_field_data(length_ + json.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < json.size(); ++i) {
|
||||||
|
data_[length_ + i] = json[i];
|
||||||
|
}
|
||||||
|
length_ += json.size();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class FieldDataSparseVectorImpl
|
class FieldDataSparseVectorImpl
|
||||||
|
|||||||
@ -163,6 +163,11 @@ class FieldMeta {
|
|||||||
return IsVectorDataType(type_);
|
return IsVectorDataType(type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_json() const {
|
||||||
|
return type_ == DataType::JSON;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
is_string() const {
|
is_string() const {
|
||||||
return IsStringDataType(type_);
|
return IsStringDataType(type_);
|
||||||
|
|||||||
@ -453,6 +453,7 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
|
|||||||
@ -229,6 +229,7 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
|
|||||||
@ -47,6 +47,7 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
|
|||||||
@ -20,12 +20,15 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
|
#include "common/Json.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
#include "exec/expression/EvalCtx.h"
|
#include "exec/expression/EvalCtx.h"
|
||||||
#include "exec/expression/VectorFunction.h"
|
#include "exec/expression/VectorFunction.h"
|
||||||
#include "exec/expression/Utils.h"
|
#include "exec/expression/Utils.h"
|
||||||
#include "exec/QueryContext.h"
|
#include "exec/QueryContext.h"
|
||||||
#include "expr/ITypeExpr.h"
|
#include "expr/ITypeExpr.h"
|
||||||
|
#include "log/Log.h"
|
||||||
#include "query/PlanProto.h"
|
#include "query/PlanProto.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
@ -109,12 +112,15 @@ class SegmentExpr : public Expr {
|
|||||||
SegmentExpr(const std::vector<ExprPtr>&& input,
|
SegmentExpr(const std::vector<ExprPtr>&& input,
|
||||||
const std::string& name,
|
const std::string& name,
|
||||||
const segcore::SegmentInternalInterface* segment,
|
const segcore::SegmentInternalInterface* segment,
|
||||||
const FieldId& field_id,
|
const FieldId field_id,
|
||||||
|
const std::vector<std::string> nested_path,
|
||||||
int64_t active_count,
|
int64_t active_count,
|
||||||
int64_t batch_size)
|
int64_t batch_size)
|
||||||
: Expr(DataType::BOOL, std::move(input), name),
|
: Expr(DataType::BOOL, std::move(input), name),
|
||||||
segment_(segment),
|
segment_(segment),
|
||||||
field_id_(field_id),
|
field_id_(field_id),
|
||||||
|
nested_path_(nested_path),
|
||||||
|
|
||||||
active_count_(active_count),
|
active_count_(active_count),
|
||||||
batch_size_(batch_size) {
|
batch_size_(batch_size) {
|
||||||
size_per_chunk_ = segment_->size_per_chunk();
|
size_per_chunk_ = segment_->size_per_chunk();
|
||||||
@ -129,6 +135,7 @@ class SegmentExpr : public Expr {
|
|||||||
InitSegmentExpr() {
|
InitSegmentExpr() {
|
||||||
auto& schema = segment_->get_schema();
|
auto& schema = segment_->get_schema();
|
||||||
auto& field_meta = schema[field_id_];
|
auto& field_meta = schema[field_id_];
|
||||||
|
field_type_ = field_meta.get_data_type();
|
||||||
|
|
||||||
if (schema.get_primary_field_id().has_value() &&
|
if (schema.get_primary_field_id().has_value() &&
|
||||||
schema.get_primary_field_id().value() == field_id_ &&
|
schema.get_primary_field_id().value() == field_id_ &&
|
||||||
@ -137,9 +144,16 @@ class SegmentExpr : public Expr {
|
|||||||
pk_type_ = field_meta.get_data_type();
|
pk_type_ = field_meta.get_data_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
is_index_mode_ = segment_->HasIndex(field_id_);
|
if (field_meta.get_data_type() == DataType::JSON) {
|
||||||
if (is_index_mode_) {
|
auto pointer = milvus::Json::pointer(nested_path_);
|
||||||
num_index_chunk_ = segment_->num_chunk_index(field_id_);
|
if (is_index_mode_ = segment_->HasIndex(field_id_, pointer)) {
|
||||||
|
num_index_chunk_ = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
is_index_mode_ = segment_->HasIndex(field_id_);
|
||||||
|
if (is_index_mode_) {
|
||||||
|
num_index_chunk_ = segment_->num_chunk_index(field_id_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// if index not include raw data, also need load data
|
// if index not include raw data, also need load data
|
||||||
if (segment_->HasFieldData(field_id_)) {
|
if (segment_->HasFieldData(field_id_)) {
|
||||||
@ -767,9 +781,21 @@ class SegmentExpr : public Expr {
|
|||||||
// It avoids indexing execute for every batch because indexing
|
// It avoids indexing execute for every batch because indexing
|
||||||
// executing costs quite much time.
|
// executing costs quite much time.
|
||||||
if (cached_index_chunk_id_ != i) {
|
if (cached_index_chunk_id_ != i) {
|
||||||
const Index& index =
|
Index* index_ptr = nullptr;
|
||||||
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
|
||||||
auto* index_ptr = const_cast<Index*>(&index);
|
if (field_type_ == DataType::JSON) {
|
||||||
|
auto pointer = milvus::Json::pointer(nested_path_);
|
||||||
|
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(
|
||||||
|
field_id_, pointer, i);
|
||||||
|
index_ptr = const_cast<Index*>(&index);
|
||||||
|
} else {
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_,
|
||||||
|
i);
|
||||||
|
index_ptr = const_cast<Index*>(&index);
|
||||||
|
}
|
||||||
cached_index_chunk_res_ = std::move(func(index_ptr, values...));
|
cached_index_chunk_res_ = std::move(func(index_ptr, values...));
|
||||||
auto valid_result = index_ptr->IsNotNull();
|
auto valid_result = index_ptr->IsNotNull();
|
||||||
cached_index_chunk_valid_res_ = std::move(valid_result);
|
cached_index_chunk_valid_res_ = std::move(valid_result);
|
||||||
@ -1067,6 +1093,9 @@ class SegmentExpr : public Expr {
|
|||||||
DataType pk_type_;
|
DataType pk_type_;
|
||||||
int64_t batch_size_;
|
int64_t batch_size_;
|
||||||
|
|
||||||
|
std::vector<std::string> nested_path_;
|
||||||
|
DataType field_type_;
|
||||||
|
|
||||||
bool is_index_mode_{false};
|
bool is_index_mode_{false};
|
||||||
bool is_data_mode_{false};
|
bool is_data_mode_{false};
|
||||||
// sometimes need to skip index and using raw data
|
// sometimes need to skip index and using raw data
|
||||||
|
|||||||
@ -40,6 +40,7 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
|
|||||||
@ -40,6 +40,7 @@ class PhyNullExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
|
|||||||
@ -61,6 +61,7 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr),
|
expr_(expr),
|
||||||
|
|||||||
@ -17,6 +17,9 @@
|
|||||||
#include "UnaryExpr.h"
|
#include "UnaryExpr.h"
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include "common/Json.h"
|
#include "common/Json.h"
|
||||||
|
#include "common/Types.h"
|
||||||
|
#include "common/type_c.h"
|
||||||
|
#include "log/Log.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
@ -191,26 +194,50 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
}
|
}
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
auto val_type = expr_->val_.val_case();
|
auto val_type = expr_->val_.val_case();
|
||||||
switch (val_type) {
|
if (CanUseIndexForJson() && !has_offset_input_) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
switch (val_type) {
|
||||||
result = ExecRangeVisitorImplJson<bool>(input);
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
break;
|
result = ExecRangeVisitorImplForIndex<bool>();
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
break;
|
||||||
result = ExecRangeVisitorImplJson<int64_t>(input);
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
break;
|
result = ExecRangeVisitorImplForIndex<int64_t>();
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
break;
|
||||||
result = ExecRangeVisitorImplJson<double>(input);
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
break;
|
result = ExecRangeVisitorImplForIndex<double>();
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
break;
|
||||||
result = ExecRangeVisitorImplJson<std::string>(input);
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
break;
|
result = ExecRangeVisitorImplForIndex<std::string>();
|
||||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
break;
|
||||||
result =
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
ExecRangeVisitorImplJson<proto::plan::Array>(input);
|
result =
|
||||||
break;
|
ExecRangeVisitorImplForIndex<proto::plan::Array>();
|
||||||
default:
|
break;
|
||||||
PanicInfo(
|
default:
|
||||||
DataTypeInvalid, "unknown data type: {}", val_type);
|
PanicInfo(
|
||||||
|
DataTypeInvalid, "unknown data type: {}", val_type);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (val_type) {
|
||||||
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
|
result = ExecRangeVisitorImplJson<bool>(input);
|
||||||
|
break;
|
||||||
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
|
result = ExecRangeVisitorImplJson<int64_t>(input);
|
||||||
|
break;
|
||||||
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
|
result = ExecRangeVisitorImplJson<double>(input);
|
||||||
|
break;
|
||||||
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
|
result = ExecRangeVisitorImplJson<std::string>(input);
|
||||||
|
break;
|
||||||
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
|
result =
|
||||||
|
ExecRangeVisitorImplJson<proto::plan::Array>(input);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
PanicInfo(
|
||||||
|
DataTypeInvalid, "unknown data type: {}", val_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1086,6 +1113,13 @@ PhyUnaryRangeFilterExpr::CanUseIndex() {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
PhyUnaryRangeFilterExpr::CanUseIndexForJson() {
|
||||||
|
use_index_ = segment_->HasIndex(
|
||||||
|
field_id_, milvus::Json::pointer(expr_->column_.nested_path_));
|
||||||
|
return use_index_;
|
||||||
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
||||||
using Index = index::TextMatchIndex;
|
using Index = index::TextMatchIndex;
|
||||||
|
|||||||
@ -320,6 +320,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
name,
|
name,
|
||||||
segment,
|
segment,
|
||||||
expr->column_.field_id_,
|
expr->column_.field_id_,
|
||||||
|
expr->column_.nested_path_,
|
||||||
active_count,
|
active_count,
|
||||||
batch_size),
|
batch_size),
|
||||||
expr_(expr) {
|
expr_(expr) {
|
||||||
@ -379,6 +380,9 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
bool
|
bool
|
||||||
CanUseIndexForArray();
|
CanUseIndexForArray();
|
||||||
|
|
||||||
|
bool
|
||||||
|
CanUseIndexForJson();
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTextMatch();
|
ExecTextMatch();
|
||||||
|
|
||||||
|
|||||||
@ -15,11 +15,15 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
#include "index/VectorMemIndex.h"
|
#include "index/VectorMemIndex.h"
|
||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
#include "index/Meta.h"
|
#include "index/Meta.h"
|
||||||
|
#include "index/JsonInvertedIndex.h"
|
||||||
#include "knowhere/utils.h"
|
#include "knowhere/utils.h"
|
||||||
|
|
||||||
#include "index/VectorDiskIndex.h"
|
#include "index/VectorDiskIndex.h"
|
||||||
@ -29,6 +33,8 @@
|
|||||||
#include "index/InvertedIndexTantivy.h"
|
#include "index/InvertedIndexTantivy.h"
|
||||||
#include "index/HybridScalarIndex.h"
|
#include "index/HybridScalarIndex.h"
|
||||||
#include "knowhere/comp/knowhere_check.h"
|
#include "knowhere/comp/knowhere_check.h"
|
||||||
|
#include "log/Log.h"
|
||||||
|
#include "pb/schema.pb.h"
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -375,6 +381,45 @@ IndexFactory::CreateComplexScalarIndex(
|
|||||||
PanicInfo(Unsupported, "Complex index not supported now");
|
PanicInfo(Unsupported, "Complex index not supported now");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IndexBasePtr
|
||||||
|
IndexFactory::CreateJsonIndex(
|
||||||
|
IndexType index_type,
|
||||||
|
DataType cast_dtype,
|
||||||
|
const std::string& nested_path,
|
||||||
|
const storage::FileManagerContext& file_manager_context) {
|
||||||
|
AssertInfo(index_type == INVERTED_INDEX_TYPE,
|
||||||
|
"Invalid index type for json index");
|
||||||
|
switch (cast_dtype) {
|
||||||
|
case DataType::BOOL:
|
||||||
|
return std::make_unique<index::JsonInvertedIndex<bool>>(
|
||||||
|
proto::schema::DataType::Bool,
|
||||||
|
nested_path,
|
||||||
|
file_manager_context);
|
||||||
|
case milvus::DataType::INT8:
|
||||||
|
case milvus::DataType::INT16:
|
||||||
|
case milvus::DataType::INT32:
|
||||||
|
case DataType::INT64:
|
||||||
|
return std::make_unique<index::JsonInvertedIndex<int64_t>>(
|
||||||
|
proto::schema::DataType::Int64,
|
||||||
|
nested_path,
|
||||||
|
file_manager_context);
|
||||||
|
case DataType::FLOAT:
|
||||||
|
case DataType::DOUBLE:
|
||||||
|
return std::make_unique<index::JsonInvertedIndex<double>>(
|
||||||
|
proto::schema::DataType::Double,
|
||||||
|
nested_path,
|
||||||
|
file_manager_context);
|
||||||
|
case DataType::STRING:
|
||||||
|
case DataType::VARCHAR:
|
||||||
|
return std::make_unique<index::JsonInvertedIndex<std::string>>(
|
||||||
|
proto::schema::DataType::String,
|
||||||
|
nested_path,
|
||||||
|
file_manager_context);
|
||||||
|
default:
|
||||||
|
PanicInfo(DataTypeInvalid, "Invalid data type:{}", cast_dtype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
IndexBasePtr
|
IndexBasePtr
|
||||||
IndexFactory::CreateScalarIndex(
|
IndexFactory::CreateScalarIndex(
|
||||||
const CreateIndexInfo& create_index_info,
|
const CreateIndexInfo& create_index_info,
|
||||||
@ -397,8 +442,10 @@ IndexFactory::CreateScalarIndex(
|
|||||||
file_manager_context);
|
file_manager_context);
|
||||||
}
|
}
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
return CreateComplexScalarIndex(create_index_info.index_type,
|
return CreateJsonIndex(create_index_info.index_type,
|
||||||
file_manager_context);
|
create_index_info.json_cast_type,
|
||||||
|
create_index_info.json_path,
|
||||||
|
file_manager_context);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "Invalid data type:{}", data_type);
|
PanicInfo(DataTypeInvalid, "Invalid data type:{}", data_type);
|
||||||
|
|||||||
@ -21,6 +21,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
|
|
||||||
|
#include "common/Types.h"
|
||||||
#include "common/type_c.h"
|
#include "common/type_c.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
@ -103,6 +104,13 @@ class IndexFactory {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
|
IndexBasePtr
|
||||||
|
CreateJsonIndex(IndexType index_type,
|
||||||
|
DataType cast_dtype,
|
||||||
|
const std::string& nested_path,
|
||||||
|
const storage::FileManagerContext& file_manager_context =
|
||||||
|
storage::FileManagerContext());
|
||||||
|
|
||||||
IndexBasePtr
|
IndexBasePtr
|
||||||
CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
|
|||||||
@ -27,6 +27,8 @@ struct CreateIndexInfo {
|
|||||||
std::string field_name;
|
std::string field_name;
|
||||||
int64_t dim;
|
int64_t dim;
|
||||||
int32_t scalar_index_engine_version;
|
int32_t scalar_index_engine_version;
|
||||||
|
DataType json_cast_type;
|
||||||
|
std::string json_path;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -29,36 +29,6 @@
|
|||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
constexpr const char* TMP_INVERTED_INDEX_PREFIX = "/tmp/milvus/inverted-index/";
|
constexpr const char* TMP_INVERTED_INDEX_PREFIX = "/tmp/milvus/inverted-index/";
|
||||||
|
|
||||||
inline TantivyDataType
|
|
||||||
get_tantivy_data_type(proto::schema::DataType data_type) {
|
|
||||||
switch (data_type) {
|
|
||||||
case proto::schema::DataType::Bool: {
|
|
||||||
return TantivyDataType::Bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
case proto::schema::DataType::Int8:
|
|
||||||
case proto::schema::DataType::Int16:
|
|
||||||
case proto::schema::DataType::Int32:
|
|
||||||
case proto::schema::DataType::Int64: {
|
|
||||||
return TantivyDataType::I64;
|
|
||||||
}
|
|
||||||
|
|
||||||
case proto::schema::DataType::Float:
|
|
||||||
case proto::schema::DataType::Double: {
|
|
||||||
return TantivyDataType::F64;
|
|
||||||
}
|
|
||||||
|
|
||||||
case proto::schema::DataType::String:
|
|
||||||
case proto::schema::DataType::VarChar: {
|
|
||||||
return TantivyDataType::Keyword;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
PanicInfo(ErrorCode::NotImplemented,
|
|
||||||
fmt::format("not implemented data type: {}", data_type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline TantivyDataType
|
inline TantivyDataType
|
||||||
get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
|
get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
|
||||||
switch (schema.data_type()) {
|
switch (schema.data_type()) {
|
||||||
@ -311,7 +281,6 @@ template <typename T>
|
|||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
InvertedIndexTantivy<T>::Range(T value, OpType op) {
|
InvertedIndexTantivy<T>::Range(T value, OpType op) {
|
||||||
TargetBitmap bitset(Count());
|
TargetBitmap bitset(Count());
|
||||||
|
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case OpType::LessThan: {
|
case OpType::LessThan: {
|
||||||
auto array = wrapper_->upper_bound_range_query(value, false);
|
auto array = wrapper_->upper_bound_range_query(value, false);
|
||||||
@ -530,6 +499,11 @@ InvertedIndexTantivy<T>::BuildWithFieldData(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case proto::schema::DataType::JSON: {
|
||||||
|
build_index_for_json(field_datas);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
PanicInfo(ErrorCode::NotImplemented,
|
PanicInfo(ErrorCode::NotImplemented,
|
||||||
fmt::format("Inverted index not supported on {}",
|
fmt::format("Inverted index not supported on {}",
|
||||||
|
|||||||
@ -24,6 +24,36 @@
|
|||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
|
inline TantivyDataType
|
||||||
|
get_tantivy_data_type(proto::schema::DataType data_type) {
|
||||||
|
switch (data_type) {
|
||||||
|
case proto::schema::DataType::Bool: {
|
||||||
|
return TantivyDataType::Bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
case proto::schema::DataType::Int8:
|
||||||
|
case proto::schema::DataType::Int16:
|
||||||
|
case proto::schema::DataType::Int32:
|
||||||
|
case proto::schema::DataType::Int64: {
|
||||||
|
return TantivyDataType::I64;
|
||||||
|
}
|
||||||
|
|
||||||
|
case proto::schema::DataType::Float:
|
||||||
|
case proto::schema::DataType::Double: {
|
||||||
|
return TantivyDataType::F64;
|
||||||
|
}
|
||||||
|
|
||||||
|
case proto::schema::DataType::String:
|
||||||
|
case proto::schema::DataType::VarChar: {
|
||||||
|
return TantivyDataType::Keyword;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
PanicInfo(ErrorCode::NotImplemented,
|
||||||
|
fmt::format("not implemented data type: {}", data_type));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
using TantivyIndexWrapper = milvus::tantivy::TantivyIndexWrapper;
|
using TantivyIndexWrapper = milvus::tantivy::TantivyIndexWrapper;
|
||||||
using RustArrayWrapper = milvus::tantivy::RustArrayWrapper;
|
using RustArrayWrapper = milvus::tantivy::RustArrayWrapper;
|
||||||
|
|
||||||
@ -176,10 +206,10 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
RegexQuery(const std::string& regex_pattern) override;
|
RegexQuery(const std::string& regex_pattern) override;
|
||||||
|
|
||||||
protected:
|
|
||||||
void
|
void
|
||||||
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
void
|
void
|
||||||
finish();
|
finish();
|
||||||
|
|
||||||
@ -187,6 +217,13 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
build_index_for_array(
|
build_index_for_array(
|
||||||
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas);
|
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas);
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
build_index_for_json(
|
||||||
|
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas) {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented,
|
||||||
|
"build_index_for_json not implemented");
|
||||||
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::shared_ptr<TantivyIndexWrapper> wrapper_;
|
std::shared_ptr<TantivyIndexWrapper> wrapper_;
|
||||||
TantivyDataType d_type_;
|
TantivyDataType d_type_;
|
||||||
|
|||||||
67
internal/core/src/index/JsonInvertedIndex.cpp
Normal file
67
internal/core/src/index/JsonInvertedIndex.cpp
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
|
#include "index/JsonInvertedIndex.h"
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <type_traits>
|
||||||
|
#include "common/EasyAssert.h"
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
|
#include "common/Json.h"
|
||||||
|
#include "common/Types.h"
|
||||||
|
#include "log/Log.h"
|
||||||
|
#include "simdjson/error.h"
|
||||||
|
|
||||||
|
namespace milvus::index {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
JsonInvertedIndex<T>::build_index_for_json(
|
||||||
|
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas) {
|
||||||
|
using GetType =
|
||||||
|
std::conditional_t<std::is_same_v<std::string, T>, std::string_view, T>;
|
||||||
|
int64_t offset = 0;
|
||||||
|
LOG_INFO("Start to build json inverted index for field: {}", nested_path_);
|
||||||
|
for (const auto& data : field_datas) {
|
||||||
|
auto n = data->get_num_rows();
|
||||||
|
for (int64_t i = 0; i < n; i++) {
|
||||||
|
auto json_column = static_cast<const Json*>(data->RawValue(i));
|
||||||
|
if (this->schema_.nullable() && !data->is_valid(i)) {
|
||||||
|
this->null_offset.push_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
value_result<GetType> res = json_column->at<GetType>(nested_path_);
|
||||||
|
auto err = res.error();
|
||||||
|
if (err != simdjson::SUCCESS) {
|
||||||
|
AssertInfo(err == simdjson::INCORRECT_TYPE ||
|
||||||
|
err == simdjson::NO_SUCH_FIELD,
|
||||||
|
"Failed to parse json, err: {}",
|
||||||
|
err);
|
||||||
|
offset++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<GetType, std::string_view>) {
|
||||||
|
auto value = std::string(res.value());
|
||||||
|
this->wrapper_->template add_data(&value, 1, offset++);
|
||||||
|
} else {
|
||||||
|
auto value = res.value();
|
||||||
|
this->wrapper_->template add_data(&value, 1, offset++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template class JsonInvertedIndex<bool>;
|
||||||
|
template class JsonInvertedIndex<int64_t>;
|
||||||
|
template class JsonInvertedIndex<double>;
|
||||||
|
template class JsonInvertedIndex<std::string>;
|
||||||
|
|
||||||
|
} // namespace milvus::index
|
||||||
67
internal/core/src/index/JsonInvertedIndex.h
Normal file
67
internal/core/src/index/JsonInvertedIndex.h
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
|
#include "index/InvertedIndexTantivy.h"
|
||||||
|
#include "storage/FileManager.h"
|
||||||
|
#include "boost/filesystem.hpp"
|
||||||
|
#include "tantivy-binding.h"
|
||||||
|
|
||||||
|
namespace milvus::index {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class JsonInvertedIndex : public index::InvertedIndexTantivy<T> {
|
||||||
|
public:
|
||||||
|
JsonInvertedIndex(const proto::schema::DataType cast_type,
|
||||||
|
const std::string& nested_path,
|
||||||
|
const storage::FileManagerContext& ctx)
|
||||||
|
: nested_path_(nested_path) {
|
||||||
|
this->schema_ = ctx.fieldDataMeta.field_schema;
|
||||||
|
this->mem_file_manager_ =
|
||||||
|
std::make_shared<storage::MemFileManagerImpl>(ctx);
|
||||||
|
this->disk_file_manager_ =
|
||||||
|
std::make_shared<storage::DiskFileManagerImpl>(ctx);
|
||||||
|
|
||||||
|
if (ctx.for_loading_index) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto prefix = this->disk_file_manager_->GetTextIndexIdentifier();
|
||||||
|
constexpr const char* TMP_INVERTED_INDEX_PREFIX =
|
||||||
|
"/tmp/milvus/inverted-index/";
|
||||||
|
this->path_ = std::string(TMP_INVERTED_INDEX_PREFIX) + prefix;
|
||||||
|
|
||||||
|
this->d_type_ = index::get_tantivy_data_type(cast_type);
|
||||||
|
boost::filesystem::create_directories(this->path_);
|
||||||
|
std::string field_name = std::to_string(
|
||||||
|
this->disk_file_manager_->GetFieldDataMeta().field_id);
|
||||||
|
this->wrapper_ = std::make_shared<index::TantivyIndexWrapper>(
|
||||||
|
field_name.c_str(), this->d_type_, this->path_.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
build_index_for_json(const std::vector<std::shared_ptr<FieldDataBase>>&
|
||||||
|
field_datas) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
finish() {
|
||||||
|
this->wrapper_->finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
create_reader() {
|
||||||
|
this->wrapper_->create_reader();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string nested_path_;
|
||||||
|
};
|
||||||
|
} // namespace milvus::index
|
||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "indexbuilder/IndexCreatorBase.h"
|
#include "indexbuilder/IndexCreatorBase.h"
|
||||||
|
#include "index/JsonInvertedIndex.h"
|
||||||
#include "indexbuilder/ScalarIndexCreator.h"
|
#include "indexbuilder/ScalarIndexCreator.h"
|
||||||
#include "indexbuilder/VecIndexCreator.h"
|
#include "indexbuilder/VecIndexCreator.h"
|
||||||
#include "indexbuilder/type_c.h"
|
#include "indexbuilder/type_c.h"
|
||||||
@ -60,6 +61,7 @@ class IndexFactory {
|
|||||||
case DataType::VARCHAR:
|
case DataType::VARCHAR:
|
||||||
case DataType::STRING:
|
case DataType::STRING:
|
||||||
case DataType::ARRAY:
|
case DataType::ARRAY:
|
||||||
|
case DataType::JSON:
|
||||||
return CreateScalarIndex(type, config, context);
|
return CreateScalarIndex(type, config, context);
|
||||||
|
|
||||||
case DataType::VECTOR_FLOAT:
|
case DataType::VECTOR_FLOAT:
|
||||||
@ -69,6 +71,7 @@ class IndexFactory {
|
|||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_FLOAT:
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
return std::make_unique<VecIndexCreator>(type, config, context);
|
return std::make_unique<VecIndexCreator>(type, config, context);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid,
|
PanicInfo(DataTypeInvalid,
|
||||||
fmt::format("invalid type is {}", invalid_dtype_msg));
|
fmt::format("invalid type is {}", invalid_dtype_msg));
|
||||||
|
|||||||
@ -10,6 +10,9 @@
|
|||||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
#include "indexbuilder/ScalarIndexCreator.h"
|
#include "indexbuilder/ScalarIndexCreator.h"
|
||||||
|
#include "common/Consts.h"
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
|
#include "common/Types.h"
|
||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
#include "index/IndexInfo.h"
|
#include "index/IndexInfo.h"
|
||||||
#include "index/Meta.h"
|
#include "index/Meta.h"
|
||||||
@ -39,6 +42,11 @@ ScalarIndexCreator::ScalarIndexCreator(
|
|||||||
|
|
||||||
index_info.field_type = dtype_;
|
index_info.field_type = dtype_;
|
||||||
index_info.index_type = index_type();
|
index_info.index_type = index_type();
|
||||||
|
if (dtype == DataType::JSON) {
|
||||||
|
index_info.json_cast_type = static_cast<DataType>(
|
||||||
|
std::stoi(config.at(JSON_CAST_TYPE).get<std::string>()));
|
||||||
|
index_info.json_path = config.at(JSON_PATH).get<std::string>();
|
||||||
|
}
|
||||||
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
||||||
index_info, file_manager_context);
|
index_info, file_manager_context);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -174,13 +174,22 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto row_count = info.index->Count();
|
|
||||||
AssertInfo(row_count > 0, "Index count is 0");
|
|
||||||
|
|
||||||
std::unique_lock lck(mutex_);
|
std::unique_lock lck(mutex_);
|
||||||
AssertInfo(
|
AssertInfo(
|
||||||
!get_bit(index_ready_bitset_, field_id),
|
!get_bit(index_ready_bitset_, field_id),
|
||||||
"scalar index has been exist at " + std::to_string(field_id.get()));
|
"scalar index has been exist at " + std::to_string(field_id.get()));
|
||||||
|
|
||||||
|
if (field_meta.get_data_type() == DataType::JSON) {
|
||||||
|
auto path = info.index_params.at(JSON_PATH);
|
||||||
|
JSONIndexKey key;
|
||||||
|
key.nested_path = path;
|
||||||
|
key.field_id = field_id;
|
||||||
|
json_indexings_[key] =
|
||||||
|
std::move(const_cast<LoadIndexInfo&>(info).index);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto row_count = info.index->Count();
|
||||||
|
AssertInfo(row_count > 0, "Index count is 0");
|
||||||
if (num_rows_.has_value()) {
|
if (num_rows_.has_value()) {
|
||||||
AssertInfo(num_rows_.value() == row_count,
|
AssertInfo(num_rows_.value() == row_count,
|
||||||
"field (" + std::to_string(field_id.get()) +
|
"field (" + std::to_string(field_id.get()) +
|
||||||
@ -1840,6 +1849,8 @@ ChunkedSegmentSealedImpl::HasRawData(int64_t field_id) const {
|
|||||||
field_indexing->indexing_.get());
|
field_indexing->indexing_.get());
|
||||||
return vec_index->HasRawData();
|
return vec_index->HasRawData();
|
||||||
}
|
}
|
||||||
|
} else if (IsJsonDataType(field_meta.get_data_type())) {
|
||||||
|
return get_bit(field_data_ready_bitset_, fieldID);
|
||||||
} else {
|
} else {
|
||||||
auto scalar_index = scalar_indexings_.find(fieldID);
|
auto scalar_index = scalar_indexings_.find(fieldID);
|
||||||
if (scalar_index != scalar_indexings_.end()) {
|
if (scalar_index != scalar_indexings_.end()) {
|
||||||
|
|||||||
@ -304,6 +304,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
HasIndex(FieldId field_id, const std::string& nested_path) const override {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
bool
|
bool
|
||||||
HasFieldData(FieldId field_id) const override {
|
HasFieldData(FieldId field_id) const override {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include "FieldIndexing.h"
|
#include "FieldIndexing.h"
|
||||||
#include "common/Common.h"
|
#include "common/Common.h"
|
||||||
|
#include "common/EasyAssert.h"
|
||||||
#include "common/Schema.h"
|
#include "common/Schema.h"
|
||||||
#include "common/Span.h"
|
#include "common/Span.h"
|
||||||
#include "common/SystemProperty.h"
|
#include "common/SystemProperty.h"
|
||||||
@ -239,6 +240,18 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
std::to_string(field_id);
|
std::to_string(field_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const index::ScalarIndex<T>&
|
||||||
|
chunk_scalar_index(FieldId field_id,
|
||||||
|
std::string path,
|
||||||
|
int64_t chunk_id) const {
|
||||||
|
using IndexType = index::ScalarIndex<T>;
|
||||||
|
auto base_ptr = chunk_index_impl(field_id, path, chunk_id);
|
||||||
|
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
|
||||||
|
AssertInfo(ptr, "entry mismatch");
|
||||||
|
return *ptr;
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<SearchResult>
|
std::unique_ptr<SearchResult>
|
||||||
Search(const query::Plan* Plan,
|
Search(const query::Plan* Plan,
|
||||||
const query::PlaceholderGroup* placeholder_group,
|
const query::PlaceholderGroup* placeholder_group,
|
||||||
@ -268,6 +281,10 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
virtual bool
|
virtual bool
|
||||||
HasIndex(FieldId field_id) const = 0;
|
HasIndex(FieldId field_id) const = 0;
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
HasIndex(FieldId field_id, const std::string& nested_path) const {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||||
|
};
|
||||||
virtual bool
|
virtual bool
|
||||||
HasFieldData(FieldId field_id) const = 0;
|
HasFieldData(FieldId field_id) const = 0;
|
||||||
|
|
||||||
@ -450,6 +467,13 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
get_timestamps() const = 0;
|
get_timestamps() const = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
virtual const index::IndexBase*
|
||||||
|
chunk_index_impl(FieldId field_id,
|
||||||
|
std::string path,
|
||||||
|
int64_t chunk_id) const {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented, "not implemented");
|
||||||
|
};
|
||||||
|
|
||||||
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type
|
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type
|
||||||
virtual void
|
virtual void
|
||||||
bulk_subscript(SystemFieldType system_type,
|
bulk_subscript(SystemFieldType system_type,
|
||||||
|
|||||||
@ -59,6 +59,53 @@ class SegmentSealed : public SegmentInternalInterface {
|
|||||||
type() const override {
|
type() const override {
|
||||||
return SegmentType::Sealed;
|
return SegmentType::Sealed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
index::IndexBase*
|
||||||
|
chunk_index_impl(FieldId field_id,
|
||||||
|
std::string path,
|
||||||
|
int64_t chunk_id) const override {
|
||||||
|
JSONIndexKey key;
|
||||||
|
key.field_id = field_id;
|
||||||
|
key.nested_path = path;
|
||||||
|
AssertInfo(json_indexings_.find(key) != json_indexings_.end(),
|
||||||
|
"Cannot find json index with path: " + path);
|
||||||
|
return json_indexings_.at(key).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
HasIndex(FieldId field_id) const override = 0;
|
||||||
|
bool
|
||||||
|
HasIndex(FieldId field_id, const std::string& path) const override {
|
||||||
|
JSONIndexKey key;
|
||||||
|
key.field_id = field_id;
|
||||||
|
key.nested_path = path;
|
||||||
|
return json_indexings_.find(key) != json_indexings_.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
struct JSONIndexKey {
|
||||||
|
FieldId field_id;
|
||||||
|
std::string nested_path;
|
||||||
|
bool
|
||||||
|
operator==(const JSONIndexKey& other) const {
|
||||||
|
return field_id == other.field_id &&
|
||||||
|
nested_path == other.nested_path;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hash_helper {
|
||||||
|
size_t
|
||||||
|
operator()(const JSONIndexKey& k) const {
|
||||||
|
std::hash<int64_t> h1;
|
||||||
|
std::hash<std::string> h2;
|
||||||
|
size_t hash_result = 0;
|
||||||
|
boost::hash_combine(hash_result, h1(k.field_id.get()));
|
||||||
|
boost::hash_combine(hash_result, h2(k.nested_path));
|
||||||
|
return hash_result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::unordered_map<JSONIndexKey, index::IndexBasePtr, hash_helper>
|
||||||
|
json_indexings_;
|
||||||
};
|
};
|
||||||
|
|
||||||
using SegmentSealedSPtr = std::shared_ptr<SegmentSealed>;
|
using SegmentSealedSPtr = std::shared_ptr<SegmentSealed>;
|
||||||
|
|||||||
@ -211,13 +211,23 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto row_count = info.index->Count();
|
|
||||||
AssertInfo(row_count > 0, "Index count is 0");
|
|
||||||
|
|
||||||
std::unique_lock lck(mutex_);
|
std::unique_lock lck(mutex_);
|
||||||
AssertInfo(
|
AssertInfo(
|
||||||
!get_bit(index_ready_bitset_, field_id),
|
!get_bit(index_ready_bitset_, field_id),
|
||||||
"scalar index has been exist at " + std::to_string(field_id.get()));
|
"scalar index has been exist at " + std::to_string(field_id.get()));
|
||||||
|
|
||||||
|
if (field_meta.get_data_type() == DataType::JSON) {
|
||||||
|
auto path = info.index_params.at(JSON_PATH);
|
||||||
|
JSONIndexKey key;
|
||||||
|
key.nested_path = path;
|
||||||
|
key.field_id = field_id;
|
||||||
|
json_indexings_[key] =
|
||||||
|
std::move(const_cast<LoadIndexInfo&>(info).index);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto row_count = info.index->Count();
|
||||||
|
AssertInfo(row_count > 0, "Index count is 0");
|
||||||
|
|
||||||
if (num_rows_.has_value()) {
|
if (num_rows_.has_value()) {
|
||||||
AssertInfo(num_rows_.value() == row_count,
|
AssertInfo(num_rows_.value() == row_count,
|
||||||
"field (" + std::to_string(field_id.get()) +
|
"field (" + std::to_string(field_id.get()) +
|
||||||
@ -226,7 +236,6 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
") than other column's row count (" +
|
") than other column's row count (" +
|
||||||
std::to_string(num_rows_.value()) + ")");
|
std::to_string(num_rows_.value()) + ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
scalar_indexings_[field_id] =
|
scalar_indexings_[field_id] =
|
||||||
std::move(const_cast<LoadIndexInfo&>(info).index);
|
std::move(const_cast<LoadIndexInfo&>(info).index);
|
||||||
// reverse pk from scalar index and set pks to offset
|
// reverse pk from scalar index and set pks to offset
|
||||||
@ -693,7 +702,6 @@ SegmentSealedImpl::num_chunk_index(FieldId field_id) const {
|
|||||||
if (field_meta.is_vector()) {
|
if (field_meta.is_vector()) {
|
||||||
return int64_t(vector_indexings_.is_ready(field_id));
|
return int64_t(vector_indexings_.is_ready(field_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
return scalar_indexings_.count(field_id);
|
return scalar_indexings_.count(field_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1664,6 +1672,8 @@ SegmentSealedImpl::HasRawData(int64_t field_id) const {
|
|||||||
field_indexing->indexing_.get());
|
field_indexing->indexing_.get());
|
||||||
return vec_index->HasRawData();
|
return vec_index->HasRawData();
|
||||||
}
|
}
|
||||||
|
} else if (IsJsonDataType(field_meta.get_data_type())) {
|
||||||
|
return get_bit(field_data_ready_bitset_, fieldID);
|
||||||
} else {
|
} else {
|
||||||
auto scalar_index = scalar_indexings_.find(fieldID);
|
auto scalar_index = scalar_indexings_.find(fieldID);
|
||||||
if (scalar_index != scalar_indexings_.end()) {
|
if (scalar_index != scalar_indexings_.end()) {
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include "segcore/load_index_c.h"
|
#include "segcore/load_index_c.h"
|
||||||
|
|
||||||
|
#include "common/Consts.h"
|
||||||
#include "common/FieldMeta.h"
|
#include "common/FieldMeta.h"
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
@ -305,6 +306,11 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {
|
|||||||
load_index_info->index_params);
|
load_index_info->index_params);
|
||||||
config[milvus::index::INDEX_FILES] = load_index_info->index_files;
|
config[milvus::index::INDEX_FILES] = load_index_info->index_files;
|
||||||
|
|
||||||
|
if (load_index_info->field_type == milvus::DataType::JSON) {
|
||||||
|
index_info.json_cast_type = static_cast<milvus::DataType>(
|
||||||
|
std::stoi(config.at(JSON_CAST_TYPE).get<std::string>()));
|
||||||
|
index_info.json_path = config.at(JSON_PATH).get<std::string>();
|
||||||
|
}
|
||||||
milvus::storage::FileManagerContext fileManagerContext(
|
milvus::storage::FileManagerContext fileManagerContext(
|
||||||
field_meta, index_meta, remote_chunk_manager);
|
field_meta, index_meta, remote_chunk_manager);
|
||||||
fileManagerContext.set_for_loading_index(true);
|
fileManagerContext.set_for_loading_index(true);
|
||||||
|
|||||||
@ -107,14 +107,24 @@ RustResult tantivy_term_query_i64(void *ptr, int64_t term);
|
|||||||
|
|
||||||
RustResult tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
|
RustResult tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
|
||||||
|
|
||||||
|
RustResult tantivy_lower_bound_range_query_bool(void *ptr, bool lower_bound, bool inclusive);
|
||||||
|
|
||||||
RustResult tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
|
RustResult tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
|
||||||
|
|
||||||
|
RustResult tantivy_upper_bound_range_query_bool(void *ptr, bool upper_bound, bool inclusive);
|
||||||
|
|
||||||
RustResult tantivy_range_query_i64(void *ptr,
|
RustResult tantivy_range_query_i64(void *ptr,
|
||||||
int64_t lower_bound,
|
int64_t lower_bound,
|
||||||
int64_t upper_bound,
|
int64_t upper_bound,
|
||||||
bool lb_inclusive,
|
bool lb_inclusive,
|
||||||
bool ub_inclusive);
|
bool ub_inclusive);
|
||||||
|
|
||||||
|
RustResult tantivy_range_query_bool(void *ptr,
|
||||||
|
bool lower_bound,
|
||||||
|
bool upper_bound,
|
||||||
|
bool lb_inclusive,
|
||||||
|
bool ub_inclusive);
|
||||||
|
|
||||||
RustResult tantivy_term_query_f64(void *ptr, double term);
|
RustResult tantivy_term_query_f64(void *ptr, double term);
|
||||||
|
|
||||||
RustResult tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
|
RustResult tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
|
||||||
|
|||||||
@ -124,6 +124,28 @@ impl IndexReaderWrapper {
|
|||||||
self.search(&q)
|
self.search(&q)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn lower_bound_range_query_bool(
|
||||||
|
&self,
|
||||||
|
lower_bound: bool,
|
||||||
|
inclusive: bool,
|
||||||
|
) -> Result<Vec<u32>> {
|
||||||
|
let lower_bound = make_bounds(Term::from_field_bool(self.field, lower_bound), inclusive);
|
||||||
|
let upper_bound = Bound::Unbounded;
|
||||||
|
let q = RangeQuery::new(lower_bound, upper_bound);
|
||||||
|
self.search(&q)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn upper_bound_range_query_bool(
|
||||||
|
&self,
|
||||||
|
upper_bound: bool,
|
||||||
|
inclusive: bool,
|
||||||
|
) -> Result<Vec<u32>> {
|
||||||
|
let lower_bound = Bound::Unbounded;
|
||||||
|
let upper_bound = make_bounds(Term::from_field_bool(self.field, upper_bound), inclusive);
|
||||||
|
let q = RangeQuery::new(lower_bound, upper_bound);
|
||||||
|
self.search(&q)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn range_query_i64(
|
pub fn range_query_i64(
|
||||||
&self,
|
&self,
|
||||||
lower_bound: i64,
|
lower_bound: i64,
|
||||||
@ -145,6 +167,19 @@ impl IndexReaderWrapper {
|
|||||||
self.search(&q)
|
self.search(&q)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn range_query_bool(
|
||||||
|
&self,
|
||||||
|
lower_bound: bool,
|
||||||
|
upper_bound: bool,
|
||||||
|
lb_inclusive: bool,
|
||||||
|
ub_inclusive: bool,
|
||||||
|
) -> Result<Vec<u32>> {
|
||||||
|
let lower_bound = make_bounds(Term::from_field_bool(self.field, lower_bound), lb_inclusive);
|
||||||
|
let upper_bound = make_bounds(Term::from_field_bool(self.field, upper_bound), ub_inclusive);
|
||||||
|
let q = RangeQuery::new(lower_bound, upper_bound);
|
||||||
|
self.search(&q)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn lower_bound_range_query_f64(
|
pub fn lower_bound_range_query_f64(
|
||||||
&self,
|
&self,
|
||||||
lower_bound: f64,
|
lower_bound: f64,
|
||||||
|
|||||||
@ -56,6 +56,20 @@ pub extern "C" fn tantivy_lower_bound_range_query_i64(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn tantivy_lower_bound_range_query_bool(
|
||||||
|
ptr: *mut c_void,
|
||||||
|
lower_bound: bool,
|
||||||
|
inclusive: bool,
|
||||||
|
) -> RustResult {
|
||||||
|
let real = ptr as *mut IndexReaderWrapper;
|
||||||
|
unsafe {
|
||||||
|
(*real)
|
||||||
|
.lower_bound_range_query_bool(lower_bound, inclusive)
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn tantivy_upper_bound_range_query_i64(
|
pub extern "C" fn tantivy_upper_bound_range_query_i64(
|
||||||
ptr: *mut c_void,
|
ptr: *mut c_void,
|
||||||
@ -70,6 +84,20 @@ pub extern "C" fn tantivy_upper_bound_range_query_i64(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn tantivy_upper_bound_range_query_bool(
|
||||||
|
ptr: *mut c_void,
|
||||||
|
upper_bound: bool,
|
||||||
|
inclusive: bool,
|
||||||
|
) -> RustResult {
|
||||||
|
let real = ptr as *mut IndexReaderWrapper;
|
||||||
|
unsafe {
|
||||||
|
(*real)
|
||||||
|
.upper_bound_range_query_bool(upper_bound, inclusive)
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn tantivy_range_query_i64(
|
pub extern "C" fn tantivy_range_query_i64(
|
||||||
ptr: *mut c_void,
|
ptr: *mut c_void,
|
||||||
@ -86,6 +114,21 @@ pub extern "C" fn tantivy_range_query_i64(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn tantivy_range_query_bool(
|
||||||
|
ptr: *mut c_void,
|
||||||
|
lower_bound: bool,
|
||||||
|
upper_bound: bool,
|
||||||
|
lb_inclusive: bool,
|
||||||
|
ub_inclusive: bool,
|
||||||
|
) -> RustResult {
|
||||||
|
let real = ptr as *mut IndexReaderWrapper;
|
||||||
|
unsafe {
|
||||||
|
(*real)
|
||||||
|
.range_query_bool(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustResult {
|
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustResult {
|
||||||
let real = ptr as *mut IndexReaderWrapper;
|
let real = ptr as *mut IndexReaderWrapper;
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "tantivy-binding.h"
|
#include "tantivy-binding.h"
|
||||||
@ -619,6 +620,11 @@ struct TantivyIndexWrapper {
|
|||||||
RustArrayWrapper
|
RustArrayWrapper
|
||||||
lower_bound_range_query(T lower_bound, bool inclusive) {
|
lower_bound_range_query(T lower_bound, bool inclusive) {
|
||||||
auto array = [&]() {
|
auto array = [&]() {
|
||||||
|
if constexpr (std::is_same_v<T, bool>) {
|
||||||
|
return tantivy_lower_bound_range_query_bool(
|
||||||
|
reader_, static_cast<bool>(lower_bound), inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (std::is_integral_v<T>) {
|
if constexpr (std::is_integral_v<T>) {
|
||||||
return tantivy_lower_bound_range_query_i64(
|
return tantivy_lower_bound_range_query_i64(
|
||||||
reader_, static_cast<int64_t>(lower_bound), inclusive);
|
reader_, static_cast<int64_t>(lower_bound), inclusive);
|
||||||
@ -656,6 +662,11 @@ struct TantivyIndexWrapper {
|
|||||||
RustArrayWrapper
|
RustArrayWrapper
|
||||||
upper_bound_range_query(T upper_bound, bool inclusive) {
|
upper_bound_range_query(T upper_bound, bool inclusive) {
|
||||||
auto array = [&]() {
|
auto array = [&]() {
|
||||||
|
if constexpr (std::is_same_v<T, bool>) {
|
||||||
|
return tantivy_upper_bound_range_query_bool(
|
||||||
|
reader_, static_cast<bool>(upper_bound), inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (std::is_integral_v<T>) {
|
if constexpr (std::is_integral_v<T>) {
|
||||||
return tantivy_upper_bound_range_query_i64(
|
return tantivy_upper_bound_range_query_i64(
|
||||||
reader_, static_cast<int64_t>(upper_bound), inclusive);
|
reader_, static_cast<int64_t>(upper_bound), inclusive);
|
||||||
@ -696,6 +707,14 @@ struct TantivyIndexWrapper {
|
|||||||
bool lb_inclusive,
|
bool lb_inclusive,
|
||||||
bool ub_inclusive) {
|
bool ub_inclusive) {
|
||||||
auto array = [&]() {
|
auto array = [&]() {
|
||||||
|
if constexpr (std::is_same_v<T, bool>) {
|
||||||
|
return tantivy_range_query_bool(reader_,
|
||||||
|
static_cast<bool>(lower_bound),
|
||||||
|
static_cast<bool>(upper_bound),
|
||||||
|
lb_inclusive,
|
||||||
|
ub_inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (std::is_integral_v<T>) {
|
if constexpr (std::is_integral_v<T>) {
|
||||||
return tantivy_range_query_i64(
|
return tantivy_range_query_i64(
|
||||||
reader_,
|
reader_,
|
||||||
|
|||||||
@ -13,16 +13,25 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <roaring/roaring.hh>
|
#include <roaring/roaring.hh>
|
||||||
|
|
||||||
|
#include "common/FieldDataInterface.h"
|
||||||
#include "common/Json.h"
|
#include "common/Json.h"
|
||||||
|
#include "common/LoadInfo.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
|
#include "index/Meta.h"
|
||||||
|
#include "index/JsonInvertedIndex.h"
|
||||||
|
#include "knowhere/comp/index_param.h"
|
||||||
|
#include "mmap/Types.h"
|
||||||
#include "pb/plan.pb.h"
|
#include "pb/plan.pb.h"
|
||||||
|
#include "pb/schema.pb.h"
|
||||||
#include "query/Plan.h"
|
#include "query/Plan.h"
|
||||||
#include "query/PlanNode.h"
|
#include "query/PlanNode.h"
|
||||||
#include "query/PlanProto.h"
|
#include "query/PlanProto.h"
|
||||||
@ -30,6 +39,8 @@
|
|||||||
#include "segcore/SegmentGrowingImpl.h"
|
#include "segcore/SegmentGrowingImpl.h"
|
||||||
#include "simdjson/padded_string.h"
|
#include "simdjson/padded_string.h"
|
||||||
#include "segcore/segment_c.h"
|
#include "segcore/segment_c.h"
|
||||||
|
#include "storage/FileManager.h"
|
||||||
|
#include "storage/Types.h"
|
||||||
#include "test_utils/DataGen.h"
|
#include "test_utils/DataGen.h"
|
||||||
#include "test_utils/GenExprProto.h"
|
#include "test_utils/GenExprProto.h"
|
||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
@ -15983,3 +15994,118 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class JsonIndexTestFixture : public testing::Test {
|
||||||
|
public:
|
||||||
|
using DataType = T;
|
||||||
|
|
||||||
|
JsonIndexTestFixture() {
|
||||||
|
if constexpr (std::is_same_v<T, bool>) {
|
||||||
|
schema_data_type = proto::schema::Bool;
|
||||||
|
json_path = "/bool";
|
||||||
|
lower_bound.set_bool_val(std::numeric_limits<bool>::min());
|
||||||
|
upper_bound.set_bool_val(std::numeric_limits<bool>::max());
|
||||||
|
cast_type = milvus::DataType::BOOL;
|
||||||
|
} else if constexpr (std::is_same_v<T, int64_t>) {
|
||||||
|
schema_data_type = proto::schema::Int64;
|
||||||
|
json_path = "/int";
|
||||||
|
lower_bound.set_int64_val(std::numeric_limits<int64_t>::min());
|
||||||
|
upper_bound.set_int64_val(std::numeric_limits<int64_t>::max());
|
||||||
|
cast_type = milvus::DataType::INT64;
|
||||||
|
} else if constexpr (std::is_same_v<T, double>) {
|
||||||
|
schema_data_type = proto::schema::Double;
|
||||||
|
json_path = "/double";
|
||||||
|
lower_bound.set_float_val(std::numeric_limits<double>::min());
|
||||||
|
upper_bound.set_float_val(std::numeric_limits<double>::max());
|
||||||
|
cast_type = milvus::DataType::DOUBLE;
|
||||||
|
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||||
|
schema_data_type = proto::schema::String;
|
||||||
|
json_path = "/string";
|
||||||
|
lower_bound.set_string_val("");
|
||||||
|
std::string s(1024, '9');
|
||||||
|
upper_bound.set_string_val(s);
|
||||||
|
cast_type = milvus::DataType::STRING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
proto::schema::DataType schema_data_type;
|
||||||
|
std::string json_path;
|
||||||
|
proto::plan::GenericValue lower_bound;
|
||||||
|
proto::plan::GenericValue upper_bound;
|
||||||
|
milvus::DataType cast_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
using JsonIndexTypes = ::testing::Types<bool, int64_t, double, std::string>;
|
||||||
|
TYPED_TEST_SUITE(JsonIndexTestFixture, JsonIndexTypes);
|
||||||
|
|
||||||
|
TYPED_TEST(JsonIndexTestFixture, TestJsonIndexUnaryExpr) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto vec_fid = schema->AddDebugField(
|
||||||
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||||
|
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||||
|
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||||
|
schema->set_primary_field_id(i64_fid);
|
||||||
|
|
||||||
|
auto seg = CreateSealedSegment(schema);
|
||||||
|
int N = 1000;
|
||||||
|
auto raw_data = DataGen(schema, N);
|
||||||
|
segcore::LoadIndexInfo load_index_info;
|
||||||
|
|
||||||
|
auto file_manager_ctx = storage::FileManagerContext();
|
||||||
|
file_manager_ctx.fieldDataMeta.field_schema.set_data_type(
|
||||||
|
milvus::proto::schema::JSON);
|
||||||
|
file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get());
|
||||||
|
auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex(
|
||||||
|
index::INVERTED_INDEX_TYPE,
|
||||||
|
this->cast_type,
|
||||||
|
this->json_path,
|
||||||
|
file_manager_ctx);
|
||||||
|
|
||||||
|
using json_index_type =
|
||||||
|
index::JsonInvertedIndex<typename TestFixture::DataType>;
|
||||||
|
auto json_index = std::unique_ptr<json_index_type>(
|
||||||
|
static_cast<json_index_type*>(inv_index.release()));
|
||||||
|
auto json_col = raw_data.get_col<std::string>(json_fid);
|
||||||
|
auto json_field =
|
||||||
|
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
|
||||||
|
std::vector<milvus::Json> jsons;
|
||||||
|
|
||||||
|
for (auto& json : json_col) {
|
||||||
|
jsons.push_back(milvus::Json(simdjson::padded_string(json)));
|
||||||
|
}
|
||||||
|
json_field->add_json_data(jsons);
|
||||||
|
|
||||||
|
json_index->BuildWithFieldData({json_field});
|
||||||
|
json_index->finish();
|
||||||
|
json_index->create_reader();
|
||||||
|
|
||||||
|
load_index_info.field_id = json_fid.get();
|
||||||
|
load_index_info.field_type = DataType::JSON;
|
||||||
|
load_index_info.index = std::move(json_index);
|
||||||
|
load_index_info.index_params = {{JSON_PATH, this->json_path}};
|
||||||
|
seg->LoadIndex(load_index_info);
|
||||||
|
|
||||||
|
auto json_field_data_info = FieldDataInfo(json_fid.get(), N, {json_field});
|
||||||
|
seg->LoadFieldData(json_fid, json_field_data_info);
|
||||||
|
|
||||||
|
auto unary_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(json_fid, DataType::JSON, {this->json_path.substr(1)}),
|
||||||
|
proto::plan::OpType::LessEqual,
|
||||||
|
this->upper_bound,
|
||||||
|
std::vector<proto::plan::GenericValue>());
|
||||||
|
auto plan =
|
||||||
|
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, unary_expr);
|
||||||
|
auto final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.count(), N);
|
||||||
|
|
||||||
|
unary_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||||
|
expr::ColumnInfo(json_fid, DataType::JSON, {this->json_path.substr(1)}),
|
||||||
|
proto::plan::OpType::GreaterEqual,
|
||||||
|
this->lower_bound,
|
||||||
|
std::vector<proto::plan::GenericValue>());
|
||||||
|
plan =
|
||||||
|
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, unary_expr);
|
||||||
|
final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.count(), N);
|
||||||
|
}
|
||||||
|
|||||||
@ -244,6 +244,23 @@ func (m *indexMeta) updateIndexTasksMetrics() {
|
|||||||
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
|
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkJsonParams(index *model.Index, req *indexpb.CreateIndexRequest) bool {
|
||||||
|
castType1, err := getIndexParam(index.IndexParams, common.JSONCastTypeKey)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
castType2, err := getIndexParam(req.GetIndexParams(), common.JSONCastTypeKey)
|
||||||
|
if err != nil || castType1 != castType2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
jsonPath1, err := getIndexParam(index.IndexParams, common.JSONPathKey)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
jsonPath2, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey)
|
||||||
|
return err == nil && jsonPath1 == jsonPath2
|
||||||
|
}
|
||||||
|
|
||||||
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
|
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
|
||||||
metaTypeParams := DeleteParams(fieldIndex.TypeParams, []string{common.MmapEnabledKey})
|
metaTypeParams := DeleteParams(fieldIndex.TypeParams, []string{common.MmapEnabledKey})
|
||||||
reqTypeParams := DeleteParams(req.TypeParams, []string{common.MmapEnabledKey})
|
reqTypeParams := DeleteParams(req.TypeParams, []string{common.MmapEnabledKey})
|
||||||
@ -332,7 +349,7 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool
|
|||||||
return !notEq
|
return !notEq
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *indexMeta) CanCreateIndex(req *indexpb.CreateIndexRequest) (UniqueID, error) {
|
func (m *indexMeta) CanCreateIndex(req *indexpb.CreateIndexRequest, isJson bool) (UniqueID, error) {
|
||||||
m.RLock()
|
m.RLock()
|
||||||
defer m.RUnlock()
|
defer m.RUnlock()
|
||||||
|
|
||||||
@ -345,7 +362,7 @@ func (m *indexMeta) CanCreateIndex(req *indexpb.CreateIndexRequest) (UniqueID, e
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if req.IndexName == index.IndexName {
|
if req.IndexName == index.IndexName {
|
||||||
if req.FieldID == index.FieldID && checkParams(index, req) {
|
if req.FieldID == index.FieldID && checkParams(index, req) && (!isJson || checkJsonParams(index, req)) {
|
||||||
return index.IndexID, nil
|
return index.IndexID, nil
|
||||||
}
|
}
|
||||||
errMsg := "at most one distinct index is allowed per field"
|
errMsg := "at most one distinct index is allowed per field"
|
||||||
@ -357,6 +374,20 @@ func (m *indexMeta) CanCreateIndex(req *indexpb.CreateIndexRequest) (UniqueID, e
|
|||||||
return 0, fmt.Errorf("CreateIndex failed: %s", errMsg)
|
return 0, fmt.Errorf("CreateIndex failed: %s", errMsg)
|
||||||
}
|
}
|
||||||
if req.FieldID == index.FieldID {
|
if req.FieldID == index.FieldID {
|
||||||
|
if isJson {
|
||||||
|
// if it is json index, check if json paths are same
|
||||||
|
jsonPath1, err := getIndexParam(index.IndexParams, common.JSONPathKey)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
jsonPath2, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if jsonPath1 != jsonPath2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
// creating multiple indexes on same field is not supported
|
// creating multiple indexes on same field is not supported
|
||||||
errMsg := "CreateIndex failed: creating multiple indexes on same field is not supported"
|
errMsg := "CreateIndex failed: creating multiple indexes on same field is not supported"
|
||||||
log.Warn(errMsg)
|
log.Warn(errMsg)
|
||||||
|
|||||||
@ -132,7 +132,7 @@ func TestMeta_ScalarAutoIndex(t *testing.T) {
|
|||||||
UserIndexParams: userIndexParams,
|
UserIndexParams: userIndexParams,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, int64(indexID), tmpIndexID)
|
assert.Equal(t, int64(indexID), tmpIndexID)
|
||||||
})
|
})
|
||||||
@ -154,12 +154,12 @@ func TestMeta_ScalarAutoIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
req.UserIndexParams = append(req.UserIndexParams, &commonpb.KeyValuePair{Key: "bitmap_cardinality_limit", Value: "1000"})
|
req.UserIndexParams = append(req.UserIndexParams, &commonpb.KeyValuePair{Key: "bitmap_cardinality_limit", Value: "1000"})
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.UserIndexParams = append(req.UserIndexParams, &commonpb.KeyValuePair{Key: "bitmap_cardinality_limit", Value: "500"})
|
req.UserIndexParams = append(req.UserIndexParams, &commonpb.KeyValuePair{Key: "bitmap_cardinality_limit", Value: "500"})
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
})
|
})
|
||||||
@ -201,7 +201,7 @@ func TestMeta_ScalarAutoIndex(t *testing.T) {
|
|||||||
UserIndexParams: userIndexParams,
|
UserIndexParams: userIndexParams,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, int64(indexID), tmpIndexID)
|
assert.Equal(t, int64(indexID), tmpIndexID)
|
||||||
newIndexParams := req.GetIndexParams()
|
newIndexParams := req.GetIndexParams()
|
||||||
@ -266,7 +266,7 @@ func TestMeta_CanCreateIndex(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t.Run("can create index", func(t *testing.T) {
|
t.Run("can create index", func(t *testing.T) {
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
index := &model.Index{
|
index := &model.Index{
|
||||||
@ -286,37 +286,37 @@ func TestMeta_CanCreateIndex(t *testing.T) {
|
|||||||
err = m.CreateIndex(context.TODO(), index)
|
err = m.CreateIndex(context.TODO(), index)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, indexID, tmpIndexID)
|
assert.Equal(t, indexID, tmpIndexID)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("params not consistent", func(t *testing.T) {
|
t.Run("params not consistent", func(t *testing.T) {
|
||||||
req.TypeParams = append(req.TypeParams, &commonpb.KeyValuePair{Key: "primary_key", Value: "false"})
|
req.TypeParams = append(req.TypeParams, &commonpb.KeyValuePair{Key: "primary_key", Value: "false"})
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.TypeParams = []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "64"}}
|
req.TypeParams = []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "64"}}
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.TypeParams = typeParams
|
req.TypeParams = typeParams
|
||||||
req.UserIndexParams = append(indexParams, &commonpb.KeyValuePair{Key: "metrics_type", Value: "L2"})
|
req.UserIndexParams = append(indexParams, &commonpb.KeyValuePair{Key: "metrics_type", Value: "L2"})
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "HNSW"}}
|
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "HNSW"}}
|
||||||
req.UserIndexParams = req.IndexParams
|
req.UserIndexParams = req.IndexParams
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
||||||
req.UserIndexParams = req.IndexParams
|
req.UserIndexParams = req.IndexParams
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
@ -325,7 +325,7 @@ func TestMeta_CanCreateIndex(t *testing.T) {
|
|||||||
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
||||||
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
||||||
req.UserAutoindexMetricTypeSpecified = false
|
req.UserAutoindexMetricTypeSpecified = false
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, indexID, tmpIndexID)
|
assert.Equal(t, indexID, tmpIndexID)
|
||||||
// req should follow the meta
|
// req should follow the meta
|
||||||
@ -336,14 +336,14 @@ func TestMeta_CanCreateIndex(t *testing.T) {
|
|||||||
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
||||||
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
|
||||||
req.UserAutoindexMetricTypeSpecified = true
|
req.UserAutoindexMetricTypeSpecified = true
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
|
|
||||||
req.IndexParams = indexParams
|
req.IndexParams = indexParams
|
||||||
req.UserIndexParams = indexParams
|
req.UserIndexParams = indexParams
|
||||||
req.FieldID++
|
req.FieldID++
|
||||||
tmpIndexID, err = m.CanCreateIndex(req)
|
tmpIndexID, err = m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
})
|
})
|
||||||
@ -351,14 +351,14 @@ func TestMeta_CanCreateIndex(t *testing.T) {
|
|||||||
t.Run("multiple indexes", func(t *testing.T) {
|
t.Run("multiple indexes", func(t *testing.T) {
|
||||||
req.IndexName = "_default_idx_2"
|
req.IndexName = "_default_idx_2"
|
||||||
req.FieldID = fieldID
|
req.FieldID = fieldID
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("index has been deleted", func(t *testing.T) {
|
t.Run("index has been deleted", func(t *testing.T) {
|
||||||
m.indexes[collID][indexID].IsDeleted = true
|
m.indexes[collID][indexID].IsDeleted = true
|
||||||
tmpIndexID, err := m.CanCreateIndex(req)
|
tmpIndexID, err := m.CanCreateIndex(req, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, int64(0), tmpIndexID)
|
assert.Equal(t, int64(0), tmpIndexID)
|
||||||
})
|
})
|
||||||
|
|||||||
@ -19,13 +19,16 @@ package datacoord
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/metastore/model"
|
"github.com/milvus-io/milvus/internal/metastore/model"
|
||||||
|
"github.com/milvus-io/milvus/internal/parser/planparserv2"
|
||||||
"github.com/milvus-io/milvus/internal/util/indexparamcheck"
|
"github.com/milvus-io/milvus/internal/util/indexparamcheck"
|
||||||
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
|
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
@ -34,6 +37,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
"github.com/milvus-io/milvus/pkg/proto/datapb"
|
"github.com/milvus-io/milvus/pkg/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/pkg/proto/indexpb"
|
"github.com/milvus-io/milvus/pkg/proto/indexpb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/planpb"
|
||||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/metautil"
|
"github.com/milvus-io/milvus/pkg/util/metautil"
|
||||||
@ -167,13 +171,8 @@ func (s *Server) createIndexForSegmentLoop(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) getFieldNameByID(ctx context.Context, collID, fieldID int64) (string, error) {
|
func (s *Server) getFieldNameByID(schema *schemapb.CollectionSchema, fieldID int64) (string, error) {
|
||||||
resp, err := s.broker.DescribeCollectionInternal(ctx, collID)
|
for _, field := range schema.GetFields() {
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, field := range resp.GetSchema().GetFields() {
|
|
||||||
if field.FieldID == fieldID {
|
if field.FieldID == fieldID {
|
||||||
return field.Name, nil
|
return field.Name, nil
|
||||||
}
|
}
|
||||||
@ -181,6 +180,62 @@ func (s *Server) getFieldNameByID(ctx context.Context, collID, fieldID int64) (s
|
|||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Server) getSchema(ctx context.Context, collID int64) (*schemapb.CollectionSchema, error) {
|
||||||
|
resp, err := s.broker.DescribeCollectionInternal(ctx, collID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return resp.GetSchema(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isJsonField(schema *schemapb.CollectionSchema, fieldID int64) (bool, error) {
|
||||||
|
for _, f := range schema.Fields {
|
||||||
|
if f.FieldID == fieldID {
|
||||||
|
return typeutil.IsJSONType(f.DataType), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, merr.WrapErrFieldNotFound(fieldID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getIndexParam(indexParams []*commonpb.KeyValuePair, key string) (string, error) {
|
||||||
|
for _, p := range indexParams {
|
||||||
|
if p.Key == key {
|
||||||
|
return p.Value, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", merr.WrapErrParameterInvalidMsg("%s not found", key)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setIndexParam(indexParams []*commonpb.KeyValuePair, key, value string) {
|
||||||
|
for _, p := range indexParams {
|
||||||
|
if p.Key == key {
|
||||||
|
p.Value = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) parseAndVerifyNestedPath(identifier string, schema *schemapb.CollectionSchema, fieldID int64) (string, error) {
|
||||||
|
helper, err := typeutil.CreateSchemaHelper(schema)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var identifierExpr *planpb.Expr
|
||||||
|
err = planparserv2.ParseIdentifier(helper, identifier, func(expr *planpb.Expr) error {
|
||||||
|
identifierExpr = expr
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if identifierExpr.GetColumnExpr().GetInfo().GetFieldId() != fieldID {
|
||||||
|
return "", fmt.Errorf("fieldID not match with field name")
|
||||||
|
}
|
||||||
|
|
||||||
|
nestedPath := identifierExpr.GetColumnExpr().GetInfo().GetNestedPath()
|
||||||
|
return "/" + strings.Join(nestedPath, "/"), nil
|
||||||
|
}
|
||||||
|
|
||||||
// CreateIndex create an index on collection.
|
// CreateIndex create an index on collection.
|
||||||
// Index building is asynchronous, so when an index building request comes, an IndexID is assigned to the task and
|
// Index building is asynchronous, so when an index building request comes, an IndexID is assigned to the task and
|
||||||
// will get all flushed segments from DataCoord and record tasks with these segments. The background process
|
// will get all flushed segments from DataCoord and record tasks with these segments. The background process
|
||||||
@ -202,21 +257,59 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques
|
|||||||
}
|
}
|
||||||
metrics.IndexRequestCounter.WithLabelValues(metrics.TotalLabel).Inc()
|
metrics.IndexRequestCounter.WithLabelValues(metrics.TotalLabel).Inc()
|
||||||
|
|
||||||
|
schema, err := s.getSchema(ctx, req.GetCollectionID())
|
||||||
|
if err != nil {
|
||||||
|
return merr.Status(err), nil
|
||||||
|
}
|
||||||
|
isJson, err := isJsonField(schema, req.GetFieldID())
|
||||||
|
if err != nil {
|
||||||
|
return merr.Status(err), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if isJson {
|
||||||
|
jsonPath, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("get json path from index params failed", zap.Error(err))
|
||||||
|
return merr.Status(err), nil
|
||||||
|
}
|
||||||
|
nestedPath, err := s.parseAndVerifyNestedPath(jsonPath, schema, req.GetFieldID())
|
||||||
|
if err != nil {
|
||||||
|
log.Error("parse nested path failed", zap.Error(err))
|
||||||
|
return merr.Status(err), nil
|
||||||
|
}
|
||||||
|
setIndexParam(req.GetIndexParams(), common.JSONPathKey, nestedPath)
|
||||||
|
}
|
||||||
|
|
||||||
if req.GetIndexName() == "" {
|
if req.GetIndexName() == "" {
|
||||||
indexes := s.meta.indexMeta.GetFieldIndexes(req.GetCollectionID(), req.GetFieldID(), req.GetIndexName())
|
indexes := s.meta.indexMeta.GetFieldIndexes(req.GetCollectionID(), req.GetFieldID(), req.GetIndexName())
|
||||||
if len(indexes) == 0 {
|
fieldName, err := s.getFieldNameByID(schema, req.GetFieldID())
|
||||||
fieldName, err := s.getFieldNameByID(ctx, req.GetCollectionID(), req.GetFieldID())
|
if err != nil {
|
||||||
|
log.Warn("get field name from schema failed", zap.Int64("fieldID", req.GetFieldID()))
|
||||||
|
return merr.Status(err), nil
|
||||||
|
}
|
||||||
|
defaultIndexName := fieldName
|
||||||
|
if isJson {
|
||||||
|
jsonPath, err := getIndexParam(req.GetIndexParams(), common.JSONPathKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("get field name from schema failed", zap.Int64("fieldID", req.GetFieldID()))
|
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
}
|
}
|
||||||
req.IndexName = fieldName
|
|
||||||
|
indexes = lo.Filter(indexes, func(index *model.Index, i int) bool {
|
||||||
|
path, err := getIndexParam(index.IndexParams, common.JSONPathKey)
|
||||||
|
return err == nil && path == jsonPath
|
||||||
|
})
|
||||||
|
|
||||||
|
defaultIndexName += jsonPath
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(indexes) == 0 {
|
||||||
|
req.IndexName = defaultIndexName
|
||||||
} else if len(indexes) == 1 {
|
} else if len(indexes) == 1 {
|
||||||
req.IndexName = indexes[0].IndexName
|
req.IndexName = indexes[0].IndexName
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
indexID, err := s.meta.indexMeta.CanCreateIndex(req)
|
indexID, err := s.meta.indexMeta.CanCreateIndex(req, isJson)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc()
|
metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc()
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
|
|||||||
@ -19,6 +19,7 @@ package datacoord
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -2584,3 +2585,178 @@ func TestValidateIndexParams(t *testing.T) {
|
|||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestJsonIndex(t *testing.T) {
|
||||||
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
||||||
|
catalog.EXPECT().CreateIndex(mock.Anything, mock.Anything).Return(nil).Maybe()
|
||||||
|
mock0Allocator := newMockAllocator(t)
|
||||||
|
indexMeta := newSegmentIndexMeta(catalog)
|
||||||
|
b := mocks.NewMockRootCoordClient(t)
|
||||||
|
b.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: 0,
|
||||||
|
Code: 0,
|
||||||
|
},
|
||||||
|
Schema: &schemapb.CollectionSchema{
|
||||||
|
Name: "test_index",
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 0,
|
||||||
|
Name: "json",
|
||||||
|
DataType: schemapb.DataType_JSON,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 1,
|
||||||
|
Name: "json2",
|
||||||
|
DataType: schemapb.DataType_JSON,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 2,
|
||||||
|
Name: "dynamic",
|
||||||
|
DataType: schemapb.DataType_JSON,
|
||||||
|
IsDynamic: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, nil)
|
||||||
|
|
||||||
|
s := &Server{
|
||||||
|
meta: &meta{
|
||||||
|
catalog: catalog,
|
||||||
|
collections: map[UniqueID]*collectionInfo{
|
||||||
|
collID: {
|
||||||
|
ID: collID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
indexMeta: indexMeta,
|
||||||
|
},
|
||||||
|
allocator: mock0Allocator,
|
||||||
|
notifyIndexChan: make(chan UniqueID, 1),
|
||||||
|
broker: broker.NewCoordinatorBroker(b),
|
||||||
|
}
|
||||||
|
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||||
|
|
||||||
|
req := &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "a",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_String))}, {Key: common.JSONPathKey, Value: "json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err := s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_String))}, {Key: common.JSONPathKey, Value: "json[\"c\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// different json field with same json path
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 1,
|
||||||
|
IndexName: "",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_String))}, {Key: common.JSONPathKey, Value: "json2[\"c\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// duplicated index with same params
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "a",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_String))}, {Key: common.JSONPathKey, Value: "json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// duplicated index with different cast type
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "a",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// duplicated index with different index name
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "b",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// another field json index with same index name
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "a",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"b\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// lack of json params
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "a",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONPathKey, Value: "json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// incorrect field name in json path
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 1,
|
||||||
|
IndexName: "c",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "bad_json[\"a\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// dynamic field
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 2,
|
||||||
|
IndexName: "",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "dynamic_a_field"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// wrong path: missing quotes
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "d",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[a][\"b\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// wrong path: missing closing quote
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "e",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"a\"][\"b"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// wrong path: malformed brackets
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "f",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"a\"[\"b]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.Error(t, merr.CheckRPCCall(resp, err))
|
||||||
|
|
||||||
|
// valid path with array index
|
||||||
|
req = &indexpb.CreateIndexRequest{
|
||||||
|
FieldID: 0,
|
||||||
|
IndexName: "g",
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.JSONCastTypeKey, Value: strconv.Itoa(int(schemapb.DataType_Int16))}, {Key: common.JSONPathKey, Value: "json[\"a\"][0][\"b\"]"}},
|
||||||
|
}
|
||||||
|
resp, err = s.CreateIndex(context.Background(), req)
|
||||||
|
assert.NoError(t, merr.CheckRPCCall(resp, err))
|
||||||
|
}
|
||||||
|
|||||||
@ -817,6 +817,7 @@ func GenAndSaveIndexV2(collectionID, partitionID, segmentID, buildID int64,
|
|||||||
IndexParams: indexInfo.GetIndexParams(),
|
IndexParams: indexInfo.GetIndexParams(),
|
||||||
IndexFilePaths: indexPaths,
|
IndexFilePaths: indexPaths,
|
||||||
CurrentIndexVersion: indexVersion.CurrentIndexVersion,
|
CurrentIndexVersion: indexVersion.CurrentIndexVersion,
|
||||||
|
IndexID: indexInfo.GetIndexID(),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -426,7 +426,7 @@ func (cit *createIndexTask) getIndexedFieldAndFunction(ctx context.Context) erro
|
|||||||
return fmt.Errorf("failed to get collection schema: %s", err)
|
return fmt.Errorf("failed to get collection schema: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
field, err := schema.schemaHelper.GetFieldFromName(cit.req.GetFieldName())
|
field, err := schema.schemaHelper.GetFieldFromNameDefaultJSON(cit.req.GetFieldName())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Error("create index on non-exist field", zap.Error(err))
|
log.Ctx(ctx).Error("create index on non-exist field", zap.Error(err))
|
||||||
return fmt.Errorf("cannot create index on non-exist field: %s", cit.req.GetFieldName())
|
return fmt.Errorf("cannot create index on non-exist field: %s", cit.req.GetFieldName())
|
||||||
|
|||||||
@ -164,7 +164,7 @@ func (c *IndexChecker) checkSegment(segment *meta.Segment, indexInfos []*indexpb
|
|||||||
var result []int64
|
var result []int64
|
||||||
for _, indexInfo := range indexInfos {
|
for _, indexInfo := range indexInfos {
|
||||||
fieldID, indexID := indexInfo.FieldID, indexInfo.IndexID
|
fieldID, indexID := indexInfo.FieldID, indexInfo.IndexID
|
||||||
info, ok := segment.IndexInfo[fieldID]
|
info, ok := segment.IndexInfo[indexID]
|
||||||
if !ok {
|
if !ok {
|
||||||
result = append(result, fieldID)
|
result = append(result, fieldID)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -124,7 +124,7 @@ type Segment struct {
|
|||||||
Node int64 // Node the segment is in
|
Node int64 // Node the segment is in
|
||||||
Version int64 // Version is the timestamp of loading segment
|
Version int64 // Version is the timestamp of loading segment
|
||||||
LastDeltaTimestamp uint64 // The timestamp of the last delta record
|
LastDeltaTimestamp uint64 // The timestamp of the last delta record
|
||||||
IndexInfo map[int64]*querypb.FieldIndexInfo // index info of loaded segment
|
IndexInfo map[int64]*querypb.FieldIndexInfo // index info of loaded segment, indexID -> FieldIndexInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
func SegmentFromInfo(info *datapb.SegmentInfo) *Segment {
|
func SegmentFromInfo(info *datapb.SegmentInfo) *Segment {
|
||||||
|
|||||||
@ -364,19 +364,19 @@ func (_c *MockSegment_GetBM25Stats_Call) RunAndReturn(run func() map[int64]*stor
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetIndex provides a mock function with given fields: fieldID
|
// GetIndex provides a mock function with given fields: fieldID
|
||||||
func (_m *MockSegment) GetIndex(fieldID int64) *IndexedFieldInfo {
|
func (_m *MockSegment) GetIndex(fieldID int64) []*IndexedFieldInfo {
|
||||||
ret := _m.Called(fieldID)
|
ret := _m.Called(fieldID)
|
||||||
|
|
||||||
if len(ret) == 0 {
|
if len(ret) == 0 {
|
||||||
panic("no return value specified for GetIndex")
|
panic("no return value specified for GetIndex")
|
||||||
}
|
}
|
||||||
|
|
||||||
var r0 *IndexedFieldInfo
|
var r0 []*IndexedFieldInfo
|
||||||
if rf, ok := ret.Get(0).(func(int64) *IndexedFieldInfo); ok {
|
if rf, ok := ret.Get(0).(func(int64) []*IndexedFieldInfo); ok {
|
||||||
r0 = rf(fieldID)
|
r0 = rf(fieldID)
|
||||||
} else {
|
} else {
|
||||||
if ret.Get(0) != nil {
|
if ret.Get(0) != nil {
|
||||||
r0 = ret.Get(0).(*IndexedFieldInfo)
|
r0 = ret.Get(0).([]*IndexedFieldInfo)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -401,12 +401,60 @@ func (_c *MockSegment_GetIndex_Call) Run(run func(fieldID int64)) *MockSegment_G
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *MockSegment_GetIndex_Call) Return(_a0 *IndexedFieldInfo) *MockSegment_GetIndex_Call {
|
func (_c *MockSegment_GetIndex_Call) Return(_a0 []*IndexedFieldInfo) *MockSegment_GetIndex_Call {
|
||||||
_c.Call.Return(_a0)
|
_c.Call.Return(_a0)
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *MockSegment_GetIndex_Call) RunAndReturn(run func(int64) *IndexedFieldInfo) *MockSegment_GetIndex_Call {
|
func (_c *MockSegment_GetIndex_Call) RunAndReturn(run func(int64) []*IndexedFieldInfo) *MockSegment_GetIndex_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetIndexByID provides a mock function with given fields: indexID
|
||||||
|
func (_m *MockSegment) GetIndexByID(indexID int64) *IndexedFieldInfo {
|
||||||
|
ret := _m.Called(indexID)
|
||||||
|
|
||||||
|
if len(ret) == 0 {
|
||||||
|
panic("no return value specified for GetIndexByID")
|
||||||
|
}
|
||||||
|
|
||||||
|
var r0 *IndexedFieldInfo
|
||||||
|
if rf, ok := ret.Get(0).(func(int64) *IndexedFieldInfo); ok {
|
||||||
|
r0 = rf(indexID)
|
||||||
|
} else {
|
||||||
|
if ret.Get(0) != nil {
|
||||||
|
r0 = ret.Get(0).(*IndexedFieldInfo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r0
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockSegment_GetIndexByID_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetIndexByID'
|
||||||
|
type MockSegment_GetIndexByID_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetIndexByID is a helper method to define mock.On call
|
||||||
|
// - indexID int64
|
||||||
|
func (_e *MockSegment_Expecter) GetIndexByID(indexID interface{}) *MockSegment_GetIndexByID_Call {
|
||||||
|
return &MockSegment_GetIndexByID_Call{Call: _e.mock.On("GetIndexByID", indexID)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockSegment_GetIndexByID_Call) Run(run func(indexID int64)) *MockSegment_GetIndexByID_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
run(args[0].(int64))
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockSegment_GetIndexByID_Call) Return(_a0 *IndexedFieldInfo) *MockSegment_GetIndexByID_Call {
|
||||||
|
_c.Call.Return(_a0)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockSegment_GetIndexByID_Call) RunAndReturn(run func(int64) *IndexedFieldInfo) *MockSegment_GetIndexByID_Call {
|
||||||
_c.Call.Return(run)
|
_c.Call.Return(run)
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|||||||
@ -295,7 +295,7 @@ type LocalSegment struct {
|
|||||||
|
|
||||||
lastDeltaTimestamp *atomic.Uint64
|
lastDeltaTimestamp *atomic.Uint64
|
||||||
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
|
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
|
||||||
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo]
|
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo] // indexID -> IndexedFieldInfo
|
||||||
warmupDispatcher *AsyncWarmupDispatcher
|
warmupDispatcher *AsyncWarmupDispatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -382,13 +382,14 @@ func (s *LocalSegment) initializeSegment() error {
|
|||||||
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
|
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
|
||||||
schemaHelper, _ := typeutil.CreateSchemaHelper(s.collection.Schema())
|
schemaHelper, _ := typeutil.CreateSchemaHelper(s.collection.Schema())
|
||||||
|
|
||||||
for fieldID, info := range indexedFieldInfos {
|
for _, info := range indexedFieldInfos {
|
||||||
|
fieldID := info.IndexInfo.FieldID
|
||||||
field, err := schemaHelper.GetFieldFromID(fieldID)
|
field, err := schemaHelper.GetFieldFromID(fieldID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
indexInfo := info.IndexInfo
|
indexInfo := info.IndexInfo
|
||||||
s.fieldIndexes.Insert(indexInfo.GetFieldID(), &IndexedFieldInfo{
|
s.fieldIndexes.Insert(indexInfo.GetIndexID(), &IndexedFieldInfo{
|
||||||
FieldBinlog: &datapb.FieldBinlog{
|
FieldBinlog: &datapb.FieldBinlog{
|
||||||
FieldID: indexInfo.GetFieldID(),
|
FieldID: indexInfo.GetFieldID(),
|
||||||
},
|
},
|
||||||
@ -472,17 +473,32 @@ func (s *LocalSegment) LastDeltaTimestamp() uint64 {
|
|||||||
return s.lastDeltaTimestamp.Load()
|
return s.lastDeltaTimestamp.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *LocalSegment) GetIndex(fieldID int64) *IndexedFieldInfo {
|
func (s *LocalSegment) GetIndexByID(indexID int64) *IndexedFieldInfo {
|
||||||
info, _ := s.fieldIndexes.Get(fieldID)
|
info, _ := s.fieldIndexes.Get(indexID)
|
||||||
|
return info
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *LocalSegment) GetIndex(fieldID int64) []*IndexedFieldInfo {
|
||||||
|
var info []*IndexedFieldInfo
|
||||||
|
s.fieldIndexes.Range(func(key int64, value *IndexedFieldInfo) bool {
|
||||||
|
if value.IndexInfo.FieldID == fieldID {
|
||||||
|
info = append(info, value)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
return info
|
return info
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *LocalSegment) ExistIndex(fieldID int64) bool {
|
func (s *LocalSegment) ExistIndex(fieldID int64) bool {
|
||||||
fieldInfo, ok := s.fieldIndexes.Get(fieldID)
|
contain := false
|
||||||
if !ok {
|
s.fieldIndexes.Range(func(key int64, value *IndexedFieldInfo) bool {
|
||||||
return false
|
if value.IndexInfo.FieldID == fieldID {
|
||||||
}
|
contain = true
|
||||||
return fieldInfo.IndexInfo != nil
|
}
|
||||||
|
return !contain
|
||||||
|
})
|
||||||
|
|
||||||
|
return contain
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *LocalSegment) HasRawData(fieldID int64) bool {
|
func (s *LocalSegment) HasRawData(fieldID int64) bool {
|
||||||
@ -1001,9 +1017,9 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn
|
|||||||
zap.Int64("indexID", indexInfo.GetIndexID()),
|
zap.Int64("indexID", indexInfo.GetIndexID()),
|
||||||
)
|
)
|
||||||
|
|
||||||
old := s.GetIndex(indexInfo.GetFieldID())
|
old := s.GetIndexByID(indexInfo.GetIndexID())
|
||||||
// the index loaded
|
// the index loaded
|
||||||
if old != nil && old.IndexInfo.GetIndexID() == indexInfo.GetIndexID() && old.IsLoaded {
|
if old != nil && old.IsLoaded {
|
||||||
log.Warn("index already loaded")
|
log.Warn("index already loaded")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1163,7 +1179,7 @@ func (s *LocalSegment) UpdateIndexInfo(ctx context.Context, indexInfo *querypb.F
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
s.fieldIndexes.Insert(indexInfo.GetFieldID(), &IndexedFieldInfo{
|
s.fieldIndexes.Insert(indexInfo.GetIndexID(), &IndexedFieldInfo{
|
||||||
FieldBinlog: &datapb.FieldBinlog{
|
FieldBinlog: &datapb.FieldBinlog{
|
||||||
FieldID: indexInfo.GetFieldID(),
|
FieldID: indexInfo.GetFieldID(),
|
||||||
},
|
},
|
||||||
|
|||||||
@ -72,7 +72,8 @@ type Segment interface {
|
|||||||
ResourceUsageEstimate() ResourceUsage
|
ResourceUsageEstimate() ResourceUsage
|
||||||
|
|
||||||
// Index related
|
// Index related
|
||||||
GetIndex(fieldID int64) *IndexedFieldInfo
|
GetIndexByID(indexID int64) *IndexedFieldInfo
|
||||||
|
GetIndex(fieldID int64) []*IndexedFieldInfo
|
||||||
ExistIndex(fieldID int64) bool
|
ExistIndex(fieldID int64) bool
|
||||||
Indexes() []*IndexedFieldInfo
|
Indexes() []*IndexedFieldInfo
|
||||||
HasRawData(fieldID int64) bool
|
HasRawData(fieldID int64) bool
|
||||||
|
|||||||
@ -105,7 +105,11 @@ func (s *L0Segment) LastDeltaTimestamp() uint64 {
|
|||||||
return last
|
return last
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *L0Segment) GetIndex(fieldID int64) *IndexedFieldInfo {
|
func (s *L0Segment) GetIndex(fieldID int64) []*IndexedFieldInfo {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *L0Segment) GetIndexByID(indexID int64) *IndexedFieldInfo {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -659,11 +659,11 @@ func (loader *segmentLoader) LoadBloomFilterSet(ctx context.Context, collectionI
|
|||||||
}
|
}
|
||||||
|
|
||||||
func separateIndexAndBinlog(loadInfo *querypb.SegmentLoadInfo) (map[int64]*IndexedFieldInfo, []*datapb.FieldBinlog) {
|
func separateIndexAndBinlog(loadInfo *querypb.SegmentLoadInfo) (map[int64]*IndexedFieldInfo, []*datapb.FieldBinlog) {
|
||||||
fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
fieldID2IndexInfo := make(map[int64][]*querypb.FieldIndexInfo)
|
||||||
for _, indexInfo := range loadInfo.IndexInfos {
|
for _, indexInfo := range loadInfo.IndexInfos {
|
||||||
if len(indexInfo.GetIndexFilePaths()) > 0 {
|
if len(indexInfo.GetIndexFilePaths()) > 0 {
|
||||||
fieldID := indexInfo.FieldID
|
fieldID := indexInfo.FieldID
|
||||||
fieldID2IndexInfo[fieldID] = indexInfo
|
fieldID2IndexInfo[fieldID] = append(fieldID2IndexInfo[fieldID], indexInfo)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -674,11 +674,13 @@ func separateIndexAndBinlog(loadInfo *querypb.SegmentLoadInfo) (map[int64]*Index
|
|||||||
fieldID := fieldBinlog.FieldID
|
fieldID := fieldBinlog.FieldID
|
||||||
// check num rows of data meta and index meta are consistent
|
// check num rows of data meta and index meta are consistent
|
||||||
if indexInfo, ok := fieldID2IndexInfo[fieldID]; ok {
|
if indexInfo, ok := fieldID2IndexInfo[fieldID]; ok {
|
||||||
fieldInfo := &IndexedFieldInfo{
|
for _, index := range indexInfo {
|
||||||
FieldBinlog: fieldBinlog,
|
fieldInfo := &IndexedFieldInfo{
|
||||||
IndexInfo: indexInfo,
|
FieldBinlog: fieldBinlog,
|
||||||
|
IndexInfo: index,
|
||||||
|
}
|
||||||
|
indexedFieldInfos[index.IndexID] = fieldInfo
|
||||||
}
|
}
|
||||||
indexedFieldInfos[fieldID] = fieldInfo
|
|
||||||
} else {
|
} else {
|
||||||
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
||||||
}
|
}
|
||||||
@ -693,11 +695,11 @@ func separateLoadInfoV2(loadInfo *querypb.SegmentLoadInfo, schema *schemapb.Coll
|
|||||||
map[int64]*datapb.TextIndexStats, // text indexed info
|
map[int64]*datapb.TextIndexStats, // text indexed info
|
||||||
map[int64]struct{}, // unindexed text fields
|
map[int64]struct{}, // unindexed text fields
|
||||||
) {
|
) {
|
||||||
fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
fieldID2IndexInfo := make(map[int64][]*querypb.FieldIndexInfo)
|
||||||
for _, indexInfo := range loadInfo.IndexInfos {
|
for _, indexInfo := range loadInfo.IndexInfos {
|
||||||
if len(indexInfo.GetIndexFilePaths()) > 0 {
|
if len(indexInfo.GetIndexFilePaths()) > 0 {
|
||||||
fieldID := indexInfo.FieldID
|
fieldID := indexInfo.FieldID
|
||||||
fieldID2IndexInfo[fieldID] = indexInfo
|
fieldID2IndexInfo[fieldID] = append(fieldID2IndexInfo[fieldID], indexInfo)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -707,12 +709,14 @@ func separateLoadInfoV2(loadInfo *querypb.SegmentLoadInfo, schema *schemapb.Coll
|
|||||||
for _, fieldBinlog := range loadInfo.BinlogPaths {
|
for _, fieldBinlog := range loadInfo.BinlogPaths {
|
||||||
fieldID := fieldBinlog.FieldID
|
fieldID := fieldBinlog.FieldID
|
||||||
// check num rows of data meta and index meta are consistent
|
// check num rows of data meta and index meta are consistent
|
||||||
if indexInfo, ok := fieldID2IndexInfo[fieldID]; ok {
|
if infos, ok := fieldID2IndexInfo[fieldID]; ok {
|
||||||
fieldInfo := &IndexedFieldInfo{
|
for _, indexInfo := range infos {
|
||||||
FieldBinlog: fieldBinlog,
|
fieldInfo := &IndexedFieldInfo{
|
||||||
IndexInfo: indexInfo,
|
FieldBinlog: fieldBinlog,
|
||||||
|
IndexInfo: indexInfo,
|
||||||
|
}
|
||||||
|
indexedFieldInfos[indexInfo.IndexID] = fieldInfo
|
||||||
}
|
}
|
||||||
indexedFieldInfos[fieldID] = fieldInfo
|
|
||||||
} else {
|
} else {
|
||||||
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
||||||
}
|
}
|
||||||
@ -769,7 +773,7 @@ func (loader *segmentLoader) loadSealedSegment(ctx context.Context, loadInfo *qu
|
|||||||
log := log.Ctx(ctx).With(zap.Int64("segmentID", segment.ID()))
|
log := log.Ctx(ctx).With(zap.Int64("segmentID", segment.ID()))
|
||||||
tr := timerecord.NewTimeRecorder("segmentLoader.loadSealedSegment")
|
tr := timerecord.NewTimeRecorder("segmentLoader.loadSealedSegment")
|
||||||
log.Info("Start loading fields...",
|
log.Info("Start loading fields...",
|
||||||
zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)),
|
// zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)),
|
||||||
zap.Int64s("indexed text fields", lo.Keys(textIndexes)),
|
zap.Int64s("indexed text fields", lo.Keys(textIndexes)),
|
||||||
zap.Int64s("unindexed text fields", lo.Keys(unindexedTextFields)),
|
zap.Int64s("unindexed text fields", lo.Keys(unindexedTextFields)),
|
||||||
)
|
)
|
||||||
@ -780,7 +784,8 @@ func (loader *segmentLoader) loadSealedSegment(ctx context.Context, loadInfo *qu
|
|||||||
metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(loadFieldsIndexSpan.Milliseconds()))
|
metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(loadFieldsIndexSpan.Milliseconds()))
|
||||||
|
|
||||||
// 2. complement raw data for the scalar fields without raw data
|
// 2. complement raw data for the scalar fields without raw data
|
||||||
for fieldID, info := range indexedFieldInfos {
|
for _, info := range indexedFieldInfos {
|
||||||
|
fieldID := info.IndexInfo.FieldID
|
||||||
field, err := schemaHelper.GetFieldFromID(fieldID)
|
field, err := schemaHelper.GetFieldFromID(fieldID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1021,7 +1026,8 @@ func (loader *segmentLoader) loadFieldsIndex(ctx context.Context,
|
|||||||
zap.Int64("rowCount", numRows),
|
zap.Int64("rowCount", numRows),
|
||||||
)
|
)
|
||||||
|
|
||||||
for fieldID, fieldInfo := range indexedFieldInfos {
|
for _, fieldInfo := range indexedFieldInfos {
|
||||||
|
fieldID := fieldInfo.IndexInfo.FieldID
|
||||||
indexInfo := fieldInfo.IndexInfo
|
indexInfo := fieldInfo.IndexInfo
|
||||||
tr := timerecord.NewTimeRecorder("loadFieldIndex")
|
tr := timerecord.NewTimeRecorder("loadFieldIndex")
|
||||||
err := loader.loadFieldIndex(ctx, segment, indexInfo)
|
err := loader.loadFieldIndex(ctx, segment, indexInfo)
|
||||||
@ -1282,7 +1288,7 @@ func (loader *segmentLoader) LoadDeltaLogs(ctx context.Context, segment Segment,
|
|||||||
func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *LocalSegment, loadInfo *querypb.SegmentLoadInfo) error {
|
func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *LocalSegment, loadInfo *querypb.SegmentLoadInfo) error {
|
||||||
var needReset bool
|
var needReset bool
|
||||||
|
|
||||||
segment.fieldIndexes.Range(func(fieldID int64, info *IndexedFieldInfo) bool {
|
segment.fieldIndexes.Range(func(indexID int64, info *IndexedFieldInfo) bool {
|
||||||
for _, info := range info.FieldBinlog.GetBinlogs() {
|
for _, info := range info.FieldBinlog.GetBinlogs() {
|
||||||
if info.GetEntriesNum() == 0 {
|
if info.GetEntriesNum() == 0 {
|
||||||
needReset = true
|
needReset = true
|
||||||
@ -1332,7 +1338,7 @@ func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *Loca
|
|||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
segment.fieldIndexes.Range(func(fieldID int64, info *IndexedFieldInfo) bool {
|
segment.fieldIndexes.Range(func(indexID int64, info *IndexedFieldInfo) bool {
|
||||||
if len(info.FieldBinlog.GetBinlogs()) != len(counts) {
|
if len(info.FieldBinlog.GetBinlogs()) != len(counts) {
|
||||||
err = errors.New("rowID & index binlog number not matched")
|
err = errors.New("rowID & index binlog number not matched")
|
||||||
return false
|
return false
|
||||||
|
|||||||
@ -678,7 +678,7 @@ func (suite *SegmentLoaderSuite) TestPatchEntryNum() {
|
|||||||
info := segment.GetIndex(vecFields[0])
|
info := segment.GetIndex(vecFields[0])
|
||||||
suite.Require().NotNil(info)
|
suite.Require().NotNil(info)
|
||||||
|
|
||||||
for _, binlog := range info.FieldBinlog.GetBinlogs() {
|
for _, binlog := range info[0].FieldBinlog.GetBinlogs() {
|
||||||
suite.Greater(binlog.EntriesNum, int64(0))
|
suite.Greater(binlog.EntriesNum, int64(0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -628,8 +628,15 @@ func (node *QueryNode) GetSegmentInfo(ctx context.Context, in *querypb.GetSegmen
|
|||||||
indexInfos []*querypb.FieldIndexInfo
|
indexInfos []*querypb.FieldIndexInfo
|
||||||
)
|
)
|
||||||
for _, field := range vecFields {
|
for _, field := range vecFields {
|
||||||
index := segment.GetIndex(field)
|
indexes := segment.GetIndex(field)
|
||||||
if index != nil {
|
if indexes != nil {
|
||||||
|
if len(indexes) != 1 {
|
||||||
|
log.Error("only support one index for vector field", zap.Int64("fieldID", field), zap.Int("index count", len(indexes)))
|
||||||
|
return &querypb.GetSegmentInfoResponse{
|
||||||
|
Status: merr.Status(merr.WrapErrServiceInternal("only support one index for vector field")),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
index := indexes[0]
|
||||||
indexName = index.IndexInfo.GetIndexName()
|
indexName = index.IndexInfo.GetIndexName()
|
||||||
indexID = index.IndexInfo.GetIndexID()
|
indexID = index.IndexInfo.GetIndexID()
|
||||||
indexInfos = append(indexInfos, index.IndexInfo)
|
indexInfos = append(indexInfos, index.IndexInfo)
|
||||||
@ -1178,7 +1185,7 @@ func (node *QueryNode) GetDataDistribution(ctx context.Context, req *querypb.Get
|
|||||||
IsSorted: s.IsSorted(),
|
IsSorted: s.IsSorted(),
|
||||||
LastDeltaTimestamp: s.LastDeltaTimestamp(),
|
LastDeltaTimestamp: s.LastDeltaTimestamp(),
|
||||||
IndexInfo: lo.SliceToMap(s.Indexes(), func(info *segments.IndexedFieldInfo) (int64, *querypb.FieldIndexInfo) {
|
IndexInfo: lo.SliceToMap(s.Indexes(), func(info *segments.IndexedFieldInfo) (int64, *querypb.FieldIndexInfo) {
|
||||||
return info.IndexInfo.FieldID, info.IndexInfo
|
return info.IndexInfo.IndexID, info.IndexInfo
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,6 +4,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -13,13 +15,23 @@ type INVERTEDChecker struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *INVERTEDChecker) CheckTrain(dataType schemapb.DataType, params map[string]string) error {
|
func (c *INVERTEDChecker) CheckTrain(dataType schemapb.DataType, params map[string]string) error {
|
||||||
|
// check json index params
|
||||||
|
isJSONIndex := typeutil.IsJSONType(dataType)
|
||||||
|
if isJSONIndex {
|
||||||
|
if _, exist := params[common.JSONCastTypeKey]; !exist {
|
||||||
|
return merr.WrapErrParameterMissing(common.JSONCastTypeKey, "json index must specify cast type")
|
||||||
|
}
|
||||||
|
if _, exist := params[common.JSONPathKey]; !exist {
|
||||||
|
return merr.WrapErrParameterMissing(common.JSONPathKey, "json index must specify json path")
|
||||||
|
}
|
||||||
|
}
|
||||||
return c.scalarIndexChecker.CheckTrain(dataType, params)
|
return c.scalarIndexChecker.CheckTrain(dataType, params)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *INVERTEDChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
|
func (c *INVERTEDChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
|
||||||
dType := field.GetDataType()
|
dType := field.GetDataType()
|
||||||
if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) &&
|
if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) &&
|
||||||
!typeutil.IsArrayType(dType) {
|
!typeutil.IsArrayType(dType) && !typeutil.IsJSONType(dType) {
|
||||||
return fmt.Errorf("INVERTED are not supported on %s field", dType.String())
|
return fmt.Errorf("INVERTED are not supported on %s field", dType.String())
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@ -19,7 +19,7 @@ func Test_INVERTEDIndexChecker(t *testing.T) {
|
|||||||
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Int64}))
|
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Int64}))
|
||||||
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Float}))
|
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Float}))
|
||||||
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Array}))
|
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_Array}))
|
||||||
|
assert.NoError(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_JSON}))
|
||||||
|
|
||||||
assert.Error(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_JSON}))
|
|
||||||
assert.Error(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_FloatVector}))
|
assert.Error(t, c.CheckValidDataType(IndexINVERTED, &schemapb.FieldSchema{DataType: schemapb.DataType_FloatVector}))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -145,6 +145,9 @@ const (
|
|||||||
IgnoreGrowing = "ignore_growing"
|
IgnoreGrowing = "ignore_growing"
|
||||||
ConsistencyLevel = "consistency_level"
|
ConsistencyLevel = "consistency_level"
|
||||||
HintsKey = "hints"
|
HintsKey = "hints"
|
||||||
|
|
||||||
|
JSONCastTypeKey = "json_cast_type"
|
||||||
|
JSONPathKey = "json_path"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Doc-in-doc-out
|
// Doc-in-doc-out
|
||||||
|
|||||||
@ -609,7 +609,6 @@ func TestCreateIndexJsonField(t *testing.T) {
|
|||||||
errMsg string
|
errMsg string
|
||||||
}
|
}
|
||||||
inxError := []scalarIndexError{
|
inxError := []scalarIndexError{
|
||||||
{index.NewInvertedIndex(), "INVERTED are not supported on JSON field"},
|
|
||||||
{index.NewSortedIndex(), "STL_SORT are only supported on numeric field"},
|
{index.NewSortedIndex(), "STL_SORT are only supported on numeric field"},
|
||||||
{index.NewTrieIndex(), "TRIE are only supported on varchar field"},
|
{index.NewTrieIndex(), "TRIE are only supported on varchar field"},
|
||||||
}
|
}
|
||||||
@ -911,7 +910,7 @@ func TestIndexNotExistName(t *testing.T) {
|
|||||||
cp := hp.NewCreateCollectionParams(hp.Int64Vec)
|
cp := hp.NewCreateCollectionParams(hp.Int64Vec)
|
||||||
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, cp, hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, cp, hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
||||||
_, err1 := mc.CreateIndex(ctx, client.NewCreateIndexOption(schema.CollectionName, "aaa", idx))
|
_, err1 := mc.CreateIndex(ctx, client.NewCreateIndexOption(schema.CollectionName, "aaa", idx))
|
||||||
common.CheckErr(t, err1, false, "cannot create index on non-exist field: aaa")
|
common.CheckErr(t, err1, false, "index HNSW only supports vector data type")
|
||||||
|
|
||||||
// describe index with not exist field name
|
// describe index with not exist field name
|
||||||
_, errDesc := mc.DescribeIndex(ctx, client.NewDescribeIndexOption(schema.CollectionName, "aaa"))
|
_, errDesc := mc.DescribeIndex(ctx, client.NewDescribeIndexOption(schema.CollectionName, "aaa"))
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from common.common_params import (
|
|||||||
from utils.util_pymilvus import *
|
from utils.util_pymilvus import *
|
||||||
from common.constants import *
|
from common.constants import *
|
||||||
from pymilvus.exceptions import MilvusException
|
from pymilvus.exceptions import MilvusException
|
||||||
|
from pymilvus import DataType
|
||||||
|
|
||||||
prefix = "index"
|
prefix = "index"
|
||||||
default_schema = cf.gen_default_collection_schema()
|
default_schema = cf.gen_default_collection_schema()
|
||||||
@ -1288,14 +1289,11 @@ class TestIndexInvalid(TestcaseBase):
|
|||||||
"""
|
"""
|
||||||
target: test create scalar index on json field
|
target: test create scalar index on json field
|
||||||
method: 1.create collection, and create index
|
method: 1.create collection, and create index
|
||||||
expected: Raise exception
|
expected: success
|
||||||
"""
|
"""
|
||||||
collection_w = self.init_collection_general(prefix, is_index=False, vector_data_type=vector_data_type)[0]
|
collection_w = self.init_collection_general(prefix, is_index=False, vector_data_type=vector_data_type)[0]
|
||||||
scalar_index_params = {"index_type": "INVERTED"}
|
scalar_index_params = {"index_type": "INVERTED", "json_cast_type": DataType.INT32, "json_path": ct.default_json_field_name+"['a']"}
|
||||||
collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params,
|
collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params)
|
||||||
check_task=CheckTasks.err_res,
|
|
||||||
check_items={ct.err_code: 1100,
|
|
||||||
ct.err_msg: "INVERTED are not supported on JSON field"})
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_create_inverted_index_on_array_field(self):
|
def test_create_inverted_index_on_array_field(self):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user