diff --git a/internal/core/src/common/Types.h b/internal/core/src/common/Types.h index 4d14577828..6e69ddac17 100644 --- a/internal/core/src/common/Types.h +++ b/internal/core/src/common/Types.h @@ -258,6 +258,34 @@ IsBinaryDataType(DataType data_type) { return IsJsonDataType(data_type) || IsArrayDataType(data_type); } +inline bool +IsPrimitiveType(proto::schema::DataType type) { + switch (type) { + case proto::schema::DataType::Bool: + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: + case proto::schema::DataType::String: + case proto::schema::DataType::VarChar: + return true; + default: + return false; + } +} + +inline bool +IsJsonType(proto::schema::DataType type) { + return type == proto::schema::DataType::JSON; +} + +inline bool +IsArrayType(proto::schema::DataType type) { + return type == proto::schema::DataType::Array; +} + inline bool IsBinaryVectorDataType(DataType data_type) { return data_type == DataType::VECTOR_BINARY; diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index 3e63763dd2..1cd3d6f9a3 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -33,7 +33,8 @@ namespace index { template BitmapIndex::BitmapIndex( const storage::FileManagerContext& file_manager_context) - : is_built_(false) { + : is_built_(false), + schema_(file_manager_context.fieldDataMeta.field_schema) { if (file_manager_context.Valid()) { file_manager_ = std::make_shared(file_manager_context); @@ -45,7 +46,9 @@ template BitmapIndex::BitmapIndex( const storage::FileManagerContext& file_manager_context, std::shared_ptr space) - : is_built_(false), data_(), space_(space) { + : is_built_(false), + schema_(file_manager_context.fieldDataMeta.field_schema), + space_(space) { if (file_manager_context.Valid()) { file_manager_ = std::make_shared( file_manager_context, space); @@ -67,27 +70,7 @@ BitmapIndex::Build(const Config& config) { auto field_datas = file_manager_->CacheRawDataToMemory(insert_files.value()); - int total_num_rows = 0; - for (const auto& field_data : field_datas) { - total_num_rows += field_data->get_num_rows(); - } - if (total_num_rows == 0) { - throw SegcoreError(DataIsEmpty, - "scalar bitmap index can not build null values"); - } - - total_num_rows_ = total_num_rows; - - int64_t offset = 0; - for (const auto& data : field_datas) { - auto slice_row_num = data->get_num_rows(); - for (size_t i = 0; i < slice_row_num; ++i) { - auto val = reinterpret_cast(data->RawValue(i)); - data_[*val].add(offset); - offset++; - } - } - is_built_ = true; + BuildWithFieldData(field_datas); } template @@ -144,6 +127,21 @@ BitmapIndex::BuildV2(const Config& config) { BuildWithFieldData(field_datas); } +template +void +BitmapIndex::BuildPrimitiveField( + const std::vector& field_datas) { + int64_t offset = 0; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto val = reinterpret_cast(data->RawValue(i)); + data_[*val].add(offset); + offset++; + } + } +} + template void BitmapIndex::BuildWithFieldData( @@ -158,17 +156,46 @@ BitmapIndex::BuildWithFieldData( } total_num_rows_ = total_num_rows; + switch (schema_.data_type()) { + case proto::schema::DataType::Bool: + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: + case proto::schema::DataType::String: + case proto::schema::DataType::VarChar: + BuildPrimitiveField(field_datas); + break; + case proto::schema::DataType::Array: + BuildArrayField(field_datas); + break; + default: + PanicInfo( + DataTypeInvalid, + fmt::format("Invalid data type: {} for build bitmap index", + proto::schema::DataType_Name(schema_.data_type()))); + } + is_built_ = true; +} + +template +void +BitmapIndex::BuildArrayField(const std::vector& field_datas) { int64_t offset = 0; for (const auto& data : field_datas) { auto slice_row_num = data->get_num_rows(); for (size_t i = 0; i < slice_row_num; ++i) { - auto val = reinterpret_cast(data->RawValue(i)); - data_[*val].add(offset); + auto array = + reinterpret_cast(data->RawValue(i)); + for (size_t j = 0; j < array->length(); ++j) { + auto val = array->template get_data(j); + data_[val].add(offset); + } offset++; } } - - is_built_ = true; } template @@ -877,4 +904,4 @@ template class BitmapIndex; template class BitmapIndex; } // namespace index -} // namespace milvus \ No newline at end of file +} // namespace milvus diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index 2ead42d5de..6dca9c6874 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -50,17 +50,6 @@ class BitmapIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); - explicit BitmapIndex( - const std::shared_ptr& file_manager) - : file_manager_(file_manager) { - } - - explicit BitmapIndex( - const std::shared_ptr& file_manager, - std::shared_ptr space) - : file_manager_(file_manager), space_(space) { - } - ~BitmapIndex() override = default; BinarySet @@ -117,6 +106,7 @@ class BitmapIndex : public ScalarIndex { BinarySet Upload(const Config& config = {}) override; + BinarySet UploadV2(const Config& config = {}) override; @@ -125,6 +115,11 @@ class BitmapIndex : public ScalarIndex { return true; } + void + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; + + public: int64_t Cardinality() { if (build_mode_ == BitmapIndexBuildMode::ROARING) { @@ -134,11 +129,13 @@ class BitmapIndex : public ScalarIndex { } } - void - LoadWithoutAssemble(const BinarySet& binary_set, - const Config& config) override; - private: + void + BuildPrimitiveField(const std::vector& datas); + + void + BuildArrayField(const std::vector& datas); + size_t GetIndexDataSize(); @@ -188,6 +185,7 @@ class BitmapIndex : public ScalarIndex { std::map data_; std::map bitsets_; size_t total_num_rows_{0}; + proto::schema::FieldSchema schema_; std::shared_ptr file_manager_; std::shared_ptr space_; }; diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp index 518828ea7b..0f032d0501 100644 --- a/internal/core/src/index/HybridScalarIndex.cpp +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -32,12 +32,14 @@ template HybridScalarIndex::HybridScalarIndex( const storage::FileManagerContext& file_manager_context) : is_built_(false), - bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), + file_manager_context_(file_manager_context) { if (file_manager_context.Valid()) { - file_manager_ = + mem_file_manager_ = std::make_shared(file_manager_context); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!"); } + field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type(); internal_index_type_ = InternalIndexType::NONE; } @@ -47,12 +49,14 @@ HybridScalarIndex::HybridScalarIndex( std::shared_ptr space) : is_built_(false), bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), + file_manager_context_(file_manager_context), space_(space) { if (file_manager_context.Valid()) { - file_manager_ = std::make_shared( + mem_file_manager_ = std::make_shared( file_manager_context, space); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!"); } + field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type(); internal_index_type_ = InternalIndexType::NONE; } @@ -96,7 +100,7 @@ HybridScalarIndex::SelectIndexBuildType( template InternalIndexType -HybridScalarIndex::SelectIndexBuildType( +HybridScalarIndex::SelectBuildTypeForPrimitiveType( const std::vector& field_datas) { std::set distinct_vals; for (const auto& data : field_datas) { @@ -121,7 +125,7 @@ HybridScalarIndex::SelectIndexBuildType( template <> InternalIndexType -HybridScalarIndex::SelectIndexBuildType( +HybridScalarIndex::SelectBuildTypeForPrimitiveType( const std::vector& field_datas) { std::set distinct_vals; for (const auto& data : field_datas) { @@ -144,6 +148,52 @@ HybridScalarIndex::SelectIndexBuildType( return internal_index_type_; } +template +InternalIndexType +HybridScalarIndex::SelectBuildTypeForArrayType( + const std::vector& field_datas) { + std::set distinct_vals; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto array = + reinterpret_cast(data->RawValue(i)); + for (size_t j = 0; j < array->length(); ++j) { + auto val = array->template get_data(j); + distinct_vals.insert(val); + + // Limit the bitmap index cardinality because of memory usage + if (distinct_vals.size() > bitmap_index_cardinality_limit_) { + break; + } + } + } + } + // Decide whether to select bitmap index or inverted index + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::INVERTED; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + const std::vector& field_datas) { + std::set distinct_vals; + if (IsPrimitiveType(field_type_)) { + return SelectBuildTypeForPrimitiveType(field_datas); + } else if (IsArrayType(field_type_)) { + return SelectBuildTypeForArrayType(field_datas); + } else { + PanicInfo(Unsupported, + fmt::format("unsupported build index for type {}", + DataType_Name(field_type_))); + } +} + template std::shared_ptr> HybridScalarIndex::GetInternalIndex() { @@ -151,9 +201,14 @@ HybridScalarIndex::GetInternalIndex() { return internal_index_; } if (internal_index_type_ == InternalIndexType::BITMAP) { - internal_index_ = std::make_shared>(file_manager_); + internal_index_ = + std::make_shared>(file_manager_context_); } else if (internal_index_type_ == InternalIndexType::STLSORT) { - internal_index_ = std::make_shared>(file_manager_); + internal_index_ = + std::make_shared>(file_manager_context_); + } else if (internal_index_type_ == InternalIndexType::INVERTED) { + internal_index_ = + std::make_shared>(file_manager_context_); } else { PanicInfo(UnexpectedError, "unknown index type when get internal index"); @@ -170,9 +225,13 @@ HybridScalarIndex::GetInternalIndex() { if (internal_index_type_ == InternalIndexType::BITMAP) { internal_index_ = - std::make_shared>(file_manager_); + std::make_shared>(file_manager_context_); } else if (internal_index_type_ == InternalIndexType::MARISA) { - internal_index_ = std::make_shared(file_manager_); + internal_index_ = + std::make_shared(file_manager_context_); + } else if (internal_index_type_ == InternalIndexType::INVERTED) { + internal_index_ = std::make_shared>( + file_manager_context_); } else { PanicInfo(UnexpectedError, "unknown index type when get internal index"); @@ -206,7 +265,7 @@ HybridScalarIndex::Build(const Config& config) { "insert file paths is empty when build index"); auto field_datas = - file_manager_->CacheRawDataToMemory(insert_files.value()); + mem_file_manager_->CacheRawDataToMemory(insert_files.value()); SelectIndexBuildType(field_datas); BuildInternal(field_datas); @@ -224,7 +283,7 @@ HybridScalarIndex::BuildV2(const Config& config) { LOG_INFO("config bitmap cardinality limit to {}", bitmap_index_cardinality_limit_); - auto field_name = file_manager_->GetIndexMeta().field_name; + auto field_name = mem_file_manager_->GetIndexMeta().field_name; auto reader = space_->ScanData(); std::vector field_datas; for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { @@ -262,32 +321,51 @@ HybridScalarIndex::Serialize(const Config& config) { template BinarySet -HybridScalarIndex::Upload(const Config& config) { - auto binary_set = Serialize(config); - file_manager_->AddFile(binary_set); +HybridScalarIndex::SerializeIndexType() { + // Add index type info to storage for future restruct index + BinarySet index_binary_set; + std::shared_ptr index_type_buf(new uint8_t[sizeof(uint8_t)]); + index_type_buf[0] = static_cast(internal_index_type_); + index_binary_set.Append(index::INDEX_TYPE, index_type_buf, sizeof(uint8_t)); + mem_file_manager_->AddFile(index_binary_set); - auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); - BinarySet ret; + auto remote_paths_to_size = mem_file_manager_->GetRemotePathsToFileSize(); + BinarySet ret_set; + Assert(remote_paths_to_size.size() == 1); for (auto& file : remote_paths_to_size) { - ret.Append(file.first, nullptr, file.second); + ret_set.Append(file.first, nullptr, file.second); + } + return ret_set; +} + +template +BinarySet +HybridScalarIndex::Upload(const Config& config) { + auto internal_index = GetInternalIndex(); + auto index_ret = internal_index->Upload(config); + + auto index_type_ret = SerializeIndexType(); + + for (auto& [key, value] : index_type_ret.binary_map_) { + index_ret.Append(key, value); } - return ret; + return index_ret; } template BinarySet HybridScalarIndex::UploadV2(const Config& config) { - auto binary_set = Serialize(config); - file_manager_->AddFileV2(binary_set); + auto internal_index = GetInternalIndex(); + auto index_ret = internal_index->Upload(config); - auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); - BinarySet ret; - for (auto& file : remote_paths_to_size) { - ret.Append(file.first, nullptr, file.second); + auto index_type_ret = SerializeIndexType(); + + for (auto& [key, value] : index_type_ret.binary_map_) { + index_ret.Append(key, value); } - return ret; + return index_ret; } template @@ -301,64 +379,32 @@ HybridScalarIndex::DeserializeIndexType(const BinarySet& binary_set) { template void -HybridScalarIndex::LoadInternal(const BinarySet& binary_set, - const Config& config) { - auto index = GetInternalIndex(); - index->LoadWithoutAssemble(binary_set, config); +HybridScalarIndex::LoadV2(const Config& config) { + PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented"); +} + +template +std::string +HybridScalarIndex::GetRemoteIndexTypeFile( + const std::vector& files) { + std::string ret; + for (auto& file : files) { + auto file_name = file.substr(file.find_last_of('/') + 1); + if (file_name == index::INDEX_TYPE) { + ret = file; + } + } + AssertInfo(!ret.empty(), "index type file not found for hybrid index"); + return ret; } template void HybridScalarIndex::Load(const BinarySet& binary_set, const Config& config) { - milvus::Assemble(const_cast(binary_set)); DeserializeIndexType(binary_set); - LoadInternal(binary_set, config); - is_built_ = true; -} - -template -void -HybridScalarIndex::LoadV2(const Config& config) { - auto blobs = space_->StatisticsBlobs(); - std::vector index_files; - auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& b : blobs) { - if (b.name.rfind(prefix, 0) == 0) { - index_files.push_back(b.name); - } - } - std::map index_datas{}; - for (auto& file_name : index_files) { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto raw_index_blob = - storage::DeserializeFileData(index_blob_data, res.value()); - auto key = file_name.substr(file_name.find_last_of('/') + 1); - index_datas[key] = raw_index_blob->GetFieldData(); - } - AssembleIndexDatas(index_datas); - - BinarySet binary_set; - for (auto& [key, data] : index_datas) { - auto size = data->Size(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto buf = std::shared_ptr( - (uint8_t*)const_cast(data->Data()), deleter); - binary_set.Append(key, buf, size); - } - - DeserializeIndexType(binary_set); - - LoadInternal(binary_set, config); + auto index = GetInternalIndex(); + index->Load(binary_set, config); is_built_ = true; } @@ -371,7 +417,11 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, GetValueFromConfig>(config, "index_files"); AssertInfo(index_files.has_value(), "index file paths is empty when load bitmap index"); - auto index_datas = file_manager_->LoadIndexToMemory(index_files.value()); + + auto index_type_file = GetRemoteIndexTypeFile(index_files.value()); + + auto index_datas = mem_file_manager_->LoadIndexToMemory( + std::vector{index_type_file}); AssembleIndexDatas(index_datas); BinarySet binary_set; for (auto& [key, data] : index_datas) { @@ -384,7 +434,8 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, DeserializeIndexType(binary_set); - LoadInternal(binary_set, config); + auto index = GetInternalIndex(); + index->Load(ctx, config); is_built_ = true; } diff --git a/internal/core/src/index/HybridScalarIndex.h b/internal/core/src/index/HybridScalarIndex.h index c3c44630bf..2683eeff9d 100644 --- a/internal/core/src/index/HybridScalarIndex.h +++ b/internal/core/src/index/HybridScalarIndex.h @@ -24,6 +24,7 @@ #include "index/BitmapIndex.h" #include "index/ScalarIndexSort.h" #include "index/StringIndexMarisa.h" +#include "index/InvertedIndexTantivy.h" #include "storage/FileManager.h" #include "storage/DiskFileManagerImpl.h" #include "storage/MemFileManagerImpl.h" @@ -37,6 +38,7 @@ enum class InternalIndexType { BITMAP, STLSORT, MARISA, + INVERTED, }; /* @@ -125,6 +127,9 @@ class HybridScalarIndex : public ScalarIndex { const bool HasRawData() const override { + if (field_type_ == proto::schema::DataType::Array) { + return false; + } return internal_index_->HasRawData(); } @@ -135,30 +140,42 @@ class HybridScalarIndex : public ScalarIndex { UploadV2(const Config& config = {}) override; private: + InternalIndexType + SelectBuildTypeForPrimitiveType( + const std::vector& field_datas); + + InternalIndexType + SelectBuildTypeForArrayType(const std::vector& field_datas); + InternalIndexType SelectIndexBuildType(const std::vector& field_datas); InternalIndexType SelectIndexBuildType(size_t n, const T* values); + BinarySet + SerializeIndexType(); + void DeserializeIndexType(const BinarySet& binary_set); void BuildInternal(const std::vector& field_datas); - void - LoadInternal(const BinarySet& binary_set, const Config& config); - std::shared_ptr> GetInternalIndex(); + std::string + GetRemoteIndexTypeFile(const std::vector& files); + public: bool is_built_{false}; int32_t bitmap_index_cardinality_limit_; + proto::schema::DataType field_type_; InternalIndexType internal_index_type_; std::shared_ptr> internal_index_{nullptr}; - std::shared_ptr file_manager_{nullptr}; + storage::FileManagerContext file_manager_context_; + std::shared_ptr mem_file_manager_{nullptr}; std::shared_ptr space_{nullptr}; }; diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index fdfdda7226..ac4a89f933 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -33,7 +33,7 @@ namespace milvus::index { template ScalarIndexPtr -IndexFactory::CreateScalarIndex( +IndexFactory::CreatePrimitiveScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context) { if (index_type == INVERTED_INDEX_TYPE) { @@ -54,7 +54,7 @@ IndexFactory::CreateScalarIndex( template <> ScalarIndexPtr -IndexFactory::CreateScalarIndex( +IndexFactory::CreatePrimitiveScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context) { #if defined(__linux__) || defined(__APPLE__) @@ -74,7 +74,7 @@ IndexFactory::CreateScalarIndex( template ScalarIndexPtr -IndexFactory::CreateScalarIndex( +IndexFactory::CreatePrimitiveScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, std::shared_ptr space) { @@ -91,7 +91,7 @@ IndexFactory::CreateScalarIndex( template <> ScalarIndexPtr -IndexFactory::CreateScalarIndex( +IndexFactory::CreatePrimitiveScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, std::shared_ptr space) { @@ -142,25 +142,32 @@ IndexFactory::CreatePrimitiveScalarIndex( switch (data_type) { // create scalar index case DataType::BOOL: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::INT8: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::INT16: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::INT32: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::INT64: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::FLOAT: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); case DataType::DOUBLE: - return CreateScalarIndex(index_type, file_manager_context); + return CreatePrimitiveScalarIndex(index_type, + file_manager_context); // create string index case DataType::STRING: case DataType::VARCHAR: - return CreateScalarIndex(index_type, - file_manager_context); + return CreatePrimitiveScalarIndex( + index_type, file_manager_context); default: throw SegcoreError( DataTypeInvalid, @@ -168,21 +175,57 @@ IndexFactory::CreatePrimitiveScalarIndex( } } +IndexBasePtr +IndexFactory::CreateCompositeScalarIndex( + IndexType index_type, + const storage::FileManagerContext& file_manager_context) { + if (index_type == BITMAP_INDEX_TYPE) { + auto element_type = static_cast( + file_manager_context.fieldDataMeta.field_schema.element_type()); + return CreatePrimitiveScalarIndex( + element_type, index_type, file_manager_context); + } else if (index_type == INVERTED_INDEX_TYPE) { + auto element_type = static_cast( + file_manager_context.fieldDataMeta.field_schema.element_type()); + return CreatePrimitiveScalarIndex( + element_type, index_type, file_manager_context); + } +} + +IndexBasePtr +IndexFactory::CreateComplexScalarIndex( + IndexType index_type, + const storage::FileManagerContext& file_manager_context) { + PanicInfo(Unsupported, "Complex index not supported now"); +} + IndexBasePtr IndexFactory::CreateScalarIndex( const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context) { - switch (create_index_info.field_type) { - case DataType::ARRAY: + auto data_type = create_index_info.field_type; + switch (data_type) { + case DataType::BOOL: + case DataType::INT8: + case DataType::INT16: + case DataType::INT32: + case DataType::INT64: + case DataType::FLOAT: + case DataType::DOUBLE: + case DataType::VARCHAR: + case DataType::STRING: return CreatePrimitiveScalarIndex( - static_cast( - file_manager_context.fieldDataMeta.schema.element_type()), - create_index_info.index_type, - file_manager_context); - default: - return CreatePrimitiveScalarIndex(create_index_info.field_type, - create_index_info.index_type, + data_type, create_index_info.index_type, file_manager_context); + case DataType::ARRAY: { + return CreateCompositeScalarIndex(create_index_info.index_type, file_manager_context); + } + case DataType::JSON: { + return CreateComplexScalarIndex(create_index_info.index_type, + file_manager_context); + } + default: + PanicInfo(DataTypeInvalid, "Invalid data type:{}", data_type); } } @@ -251,43 +294,6 @@ IndexFactory::CreateVectorIndex( } } -IndexBasePtr -IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager, - std::shared_ptr space) { - auto data_type = create_index_info.field_type; - auto index_type = create_index_info.index_type; - - switch (data_type) { - // create scalar index - case DataType::BOOL: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::INT8: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::INT16: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::INT32: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::INT64: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::FLOAT: - return CreateScalarIndex(index_type, file_manager, space); - case DataType::DOUBLE: - return CreateScalarIndex(index_type, file_manager, space); - - // create string index - case DataType::STRING: - case DataType::VARCHAR: - return CreateScalarIndex( - index_type, file_manager, space); - default: - throw SegcoreError( - DataTypeInvalid, - fmt::format("invalid data type to build mem index: {}", - data_type)); - } -} - IndexBasePtr IndexFactory::CreateVectorIndex( const CreateIndexInfo& create_index_info, diff --git a/internal/core/src/index/IndexFactory.h b/internal/core/src/index/IndexFactory.h index 47b255ab4e..61c5119d4c 100644 --- a/internal/core/src/index/IndexFactory.h +++ b/internal/core/src/index/IndexFactory.h @@ -65,6 +65,7 @@ class IndexFactory { CreateVectorIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context); + // For base types like int, float, double, string, etc IndexBasePtr CreatePrimitiveScalarIndex( DataType data_type, @@ -72,6 +73,20 @@ class IndexFactory { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); + // For types like array, struct, union, etc + IndexBasePtr + CreateCompositeScalarIndex( + IndexType index_type, + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + + // For types like Json, XML, etc + IndexBasePtr + CreateComplexScalarIndex( + IndexType index_type, + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + IndexBasePtr CreateScalarIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context = @@ -85,7 +100,10 @@ class IndexFactory { IndexBasePtr CreateScalarIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); + std::shared_ptr space) { + PanicInfo(ErrorCode::Unsupported, + "CreateScalarIndexV2 not implemented"); + } // IndexBasePtr // CreateIndex(DataType dtype, const IndexType& index_type); @@ -94,28 +112,15 @@ class IndexFactory { template ScalarIndexPtr - CreateScalarIndex(const IndexType& index_type, - const storage::FileManagerContext& file_manager = - storage::FileManagerContext()); + CreatePrimitiveScalarIndex(const IndexType& index_type, + const storage::FileManagerContext& file_manager = + storage::FileManagerContext()); template ScalarIndexPtr - CreateScalarIndex(const IndexType& index_type, - const storage::FileManagerContext& file_manager, - std::shared_ptr space); + CreatePrimitiveScalarIndex(const IndexType& index_type, + const storage::FileManagerContext& file_manager, + std::shared_ptr space); }; -// template <> -// ScalarIndexPtr -// IndexFactory::CreateScalarIndex( -// const IndexType& index_type, -// const storage::FileManagerContext& file_manager_context, -// DataType d_type); - -template <> -ScalarIndexPtr -IndexFactory::CreateScalarIndex( - const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); } // namespace milvus::index diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 3b9a54fae9..984dc19466 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -66,7 +66,7 @@ template InvertedIndexTantivy::InvertedIndexTantivy( const storage::FileManagerContext& ctx, std::shared_ptr space) - : space_(space), schema_(ctx.fieldDataMeta.schema) { + : space_(space), schema_(ctx.fieldDataMeta.field_schema) { mem_file_manager_ = std::make_shared(ctx, ctx.space_); disk_file_manager_ = std::make_shared(ctx, ctx.space_); auto field = @@ -259,8 +259,7 @@ InvertedIndexTantivy::InApplyCallback( template const TargetBitmap InvertedIndexTantivy::NotIn(size_t n, const T* values) { - TargetBitmap bitset(Count()); - bitset.set(); + TargetBitmap bitset(Count(), true); for (size_t i = 0; i < n; ++i) { auto array = wrapper_->term_query(values[i]); apply_hits(bitset, array, false); diff --git a/internal/core/src/index/ScalarIndexSort.h b/internal/core/src/index/ScalarIndexSort.h index 96402017c9..ca44045c93 100644 --- a/internal/core/src/index/ScalarIndexSort.h +++ b/internal/core/src/index/ScalarIndexSort.h @@ -41,17 +41,6 @@ class ScalarIndexSort : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); - explicit ScalarIndexSort( - const std::shared_ptr& file_manager) - : file_manager_(file_manager) { - } - - explicit ScalarIndexSort( - const std::shared_ptr& file_manager, - std::shared_ptr space) - : file_manager_(file_manager), space_(space) { - } - BinarySet Serialize(const Config& config) override; diff --git a/internal/core/src/index/StringIndexMarisa.h b/internal/core/src/index/StringIndexMarisa.h index e787a7e63b..214635280e 100644 --- a/internal/core/src/index/StringIndexMarisa.h +++ b/internal/core/src/index/StringIndexMarisa.h @@ -37,17 +37,6 @@ class StringIndexMarisa : public StringIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); - explicit StringIndexMarisa( - const std::shared_ptr& file_manager) - : file_manager_(file_manager) { - } - - explicit StringIndexMarisa( - const std::shared_ptr& file_manager, - std::shared_ptr space) - : file_manager_(file_manager), space_(space) { - } - int64_t Size() override; diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 7ccaf7c414..84f781e589 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -274,7 +274,8 @@ CreateIndexV2(CIndex* res_index, build_index_info->collectionid(), build_index_info->partitionid(), build_index_info->segmentid(), - build_index_info->field_schema().fieldid()}; + build_index_info->field_schema().fieldid(), + build_index_info->field_schema()}; milvus::storage::IndexMeta index_meta{ build_index_info->segmentid(), build_index_info->field_schema().fieldid(), diff --git a/internal/core/src/storage/Types.h b/internal/core/src/storage/Types.h index fbd72d0a59..928386d190 100644 --- a/internal/core/src/storage/Types.h +++ b/internal/core/src/storage/Types.h @@ -64,7 +64,7 @@ struct FieldDataMeta { int64_t partition_id; int64_t segment_id; int64_t field_id; - proto::schema::FieldSchema schema; + proto::schema::FieldSchema field_schema; }; enum CodecType { diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 62c68e6dcb..43da214d5c 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -20,7 +20,6 @@ set(MILVUS_TEST_FILES test_bf.cpp test_bf_sparse.cpp test_binary.cpp - test_bitmap.cpp test_bool_index.cpp test_common.cpp test_concurrent_vector.cpp @@ -33,6 +32,7 @@ set(MILVUS_TEST_FILES test_growing_index.cpp test_indexing.cpp test_hybrid_index.cpp + test_array_bitmap_index.cpp test_index_c_api.cpp test_index_wrapper.cpp test_init.cpp diff --git a/internal/core/unittest/test_array_bitmap_index.cpp b/internal/core/unittest/test_array_bitmap_index.cpp new file mode 100644 index 0000000000..6f86c30e7e --- /dev/null +++ b/internal/core/unittest/test_array_bitmap_index.cpp @@ -0,0 +1,330 @@ +// Copyright(C) 2019 - 2020 Zilliz.All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include +#include + +#include "common/Tracer.h" +#include "index/BitmapIndex.h" +#include "storage/Util.h" +#include "storage/InsertData.h" +#include "indexbuilder/IndexFactory.h" +#include "index/IndexFactory.h" +#include "test_utils/indexbuilder_test_utils.h" +#include "index/Meta.h" +#include "pb/schema.pb.h" + +using namespace milvus::index; +using namespace milvus::indexbuilder; +using namespace milvus; +using namespace milvus::index; + +template +static std::vector +GenerateData(const size_t size, const size_t cardinality) { + std::vector result; + for (size_t i = 0; i < size; ++i) { + result.push_back(rand() % cardinality); + } + return result; +} + +template <> +std::vector +GenerateData(const size_t size, const size_t cardinality) { + std::vector result; + for (size_t i = 0; i < size; ++i) { + result.push_back(rand() % 2 == 0); + } + return result; +} + +template <> +std::vector +GenerateData(const size_t size, const size_t cardinality) { + std::vector result; + for (size_t i = 0; i < size; ++i) { + result.push_back(std::to_string(rand() % cardinality)); + } + return result; +} + +std::vector +GenerateArrayData(proto::schema::DataType element_type, + int cardinality, + int size, + int array_len) { + std::vector data(size); + switch (element_type) { + case proto::schema::DataType::Bool: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + for (int j = 0; j < array_len; j++) { + field_data.mutable_bool_data()->add_data( + static_cast(random())); + } + data[i] = field_data; + } + break; + } + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + + for (int j = 0; j < array_len; j++) { + field_data.mutable_int_data()->add_data( + static_cast(random() % cardinality)); + } + data[i] = field_data; + } + break; + } + case proto::schema::DataType::Int64: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + for (int j = 0; j < array_len; j++) { + field_data.mutable_long_data()->add_data( + static_cast(random() % cardinality)); + } + data[i] = field_data; + } + break; + } + case proto::schema::DataType::String: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + + for (int j = 0; j < array_len; j++) { + field_data.mutable_string_data()->add_data( + std::to_string(random() % cardinality)); + } + data[i] = field_data; + } + break; + } + case proto::schema::DataType::Float: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + + for (int j = 0; j < array_len; j++) { + field_data.mutable_float_data()->add_data( + static_cast(random() % cardinality)); + } + data[i] = field_data; + } + break; + } + case proto::schema::DataType::Double: { + for (int i = 0; i < size; i++) { + milvus::proto::schema::ScalarField field_data; + + for (int j = 0; j < array_len; j++) { + field_data.mutable_double_data()->add_data( + static_cast(random() % cardinality)); + } + data[i] = field_data; + } + break; + } + default: { + throw std::runtime_error("unsupported data type"); + } + } + std::vector res; + for (int i = 0; i < size; i++) { + res.push_back(milvus::Array(data[i])); + } + return res; +} + +template +class ArrayBitmapIndexTest : public testing::Test { + protected: + void + Init(int64_t collection_id, + int64_t partition_id, + int64_t segment_id, + int64_t field_id, + int64_t index_build_id, + int64_t index_version) { + proto::schema::FieldSchema field_schema; + field_schema.set_data_type(proto::schema::DataType::Array); + proto::schema::DataType element_type; + if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Int8; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Int16; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Int32; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Int64; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Float; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::Double; + } else if constexpr (std::is_same_v) { + element_type = proto::schema::DataType::String; + } + field_schema.set_element_type(element_type); + auto field_meta = storage::FieldDataMeta{ + collection_id, partition_id, segment_id, field_id, field_schema}; + auto index_meta = storage::IndexMeta{ + segment_id, field_id, index_build_id, index_version}; + + data_ = GenerateArrayData(element_type, cardinality_, nb_, 10); + + auto field_data = storage::CreateFieldData(DataType::ARRAY); + field_data->FillFieldData(data_.data(), data_.size()); + storage::InsertData insert_data(field_data); + insert_data.SetFieldDataMeta(field_meta); + insert_data.SetTimestamps(0, 100); + + auto serialized_bytes = insert_data.Serialize(storage::Remote); + + auto log_path = fmt::format("{}/{}/{}/{}/{}/{}", + "test_array_bitmap", + collection_id, + partition_id, + segment_id, + field_id, + 0); + chunk_manager_->Write( + log_path, serialized_bytes.data(), serialized_bytes.size()); + + storage::FileManagerContext ctx(field_meta, index_meta, chunk_manager_); + std::vector index_files; + + Config config; + config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; + config["insert_files"] = std::vector{log_path}; + config["bitmap_cardinality_limit"] = "1000"; + + auto build_index = + indexbuilder::IndexFactory::GetInstance().CreateIndex( + DataType::ARRAY, config, ctx); + build_index->Build(); + + auto binary_set = build_index->Upload(); + for (const auto& [key, _] : binary_set.binary_map_) { + index_files.push_back(key); + } + + index::CreateIndexInfo index_info{}; + index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; + index_info.field_type = DataType::ARRAY; + + config["index_files"] = index_files; + + index_ = + index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); + index_->Load(milvus::tracer::TraceContext{}, config); + } + + void + SetUp() override { + nb_ = 10000; + cardinality_ = 30; + + // if constexpr (std::is_same_v) { + // type_ = DataType::INT8; + // } else if constexpr (std::is_same_v) { + // type_ = DataType::INT16; + // } else if constexpr (std::is_same_v) { + // type_ = DataType::INT32; + // } else if constexpr (std::is_same_v) { + // type_ = DataType::INT64; + // } else if constexpr (std::is_same_v) { + // type_ = DataType::VARCHAR; + // } + int64_t collection_id = 1; + int64_t partition_id = 2; + int64_t segment_id = 3; + int64_t field_id = 101; + int64_t index_build_id = 1000; + int64_t index_version = 10000; + std::string root_path = "/tmp/test-bitmap-index/"; + + storage::StorageConfig storage_config; + storage_config.storage_type = "local"; + storage_config.root_path = root_path; + chunk_manager_ = storage::CreateChunkManager(storage_config); + + Init(collection_id, + partition_id, + segment_id, + field_id, + index_build_id, + index_version); + } + + virtual ~ArrayBitmapIndexTest() override { + boost::filesystem::remove_all(chunk_manager_->GetRootPath()); + } + + public: + void + TestInFunc() { + // boost::container::vector test_data; + // std::unordered_set s; + // size_t nq = 10; + // for (size_t i = 0; i < nq; i++) { + // test_data.push_back(data_[i]); + // s.insert(data_[i]); + // } + // auto index_ptr = dynamic_cast*>(index_.get()); + // auto bitset = index_ptr->In(test_data.size(), test_data.data()); + // for (size_t i = 0; i < bitset.size(); i++) { + // ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end()); + // } + } + + private: + std::shared_ptr chunk_manager_; + + public: + DataType type_; + IndexBasePtr index_; + size_t nb_; + size_t cardinality_; + std::vector data_; +}; + +TYPED_TEST_SUITE_P(ArrayBitmapIndexTest); + +TYPED_TEST_P(ArrayBitmapIndexTest, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +TYPED_TEST_P(ArrayBitmapIndexTest, INFuncTest) { + // this->TestInFunc(); +} + +TYPED_TEST_P(ArrayBitmapIndexTest, NotINFuncTest) { + //this->TestNotInFunc(); +} + +using BitmapType = + testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTest, + CountFuncTest, + INFuncTest, + NotINFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheck, + ArrayBitmapIndexTest, + BitmapType); diff --git a/internal/core/unittest/test_hybrid_index.cpp b/internal/core/unittest/test_hybrid_index.cpp index 4208719930..1f6ea6aef8 100644 --- a/internal/core/unittest/test_hybrid_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -24,6 +24,7 @@ #include "index/IndexFactory.h" #include "test_utils/indexbuilder_test_utils.h" #include "index/Meta.h" +#include "pb/schema.pb.h" using namespace milvus::index; using namespace milvus::indexbuilder; @@ -70,8 +71,24 @@ class HybridIndexTestV1 : public testing::Test { int64_t field_id, int64_t index_build_id, int64_t index_version) { + proto::schema::FieldSchema field_schema; + if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Int8); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Int16); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Int32); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Int64); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Float); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::Double); + } else if constexpr (std::is_same_v) { + field_schema.set_data_type(proto::schema::DataType::String); + } auto field_meta = storage::FieldDataMeta{ - collection_id, partition_id, segment_id, field_id}; + collection_id, partition_id, segment_id, field_id, field_schema}; auto index_meta = storage::IndexMeta{ segment_id, field_id, index_build_id, index_version}; diff --git a/internal/core/unittest/test_inverted_index.cpp b/internal/core/unittest/test_inverted_index.cpp index c8b9bf3663..83d3a65673 100644 --- a/internal/core/unittest/test_inverted_index.cpp +++ b/internal/core/unittest/test_inverted_index.cpp @@ -40,8 +40,9 @@ gen_field_meta(int64_t collection_id = 1, .segment_id = segment_id, .field_id = field_id, }; - meta.schema.set_data_type(static_cast(data_type)); - meta.schema.set_element_type( + meta.field_schema.set_data_type( + static_cast(data_type)); + meta.field_schema.set_element_type( static_cast(element_type)); return meta; } diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index 9a99bec26a..2d3e6bb213 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -56,7 +56,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) { auto GetTempFileManagerCtx(CDataType data_type) { auto ctx = milvus::storage::FileManagerContext(); - ctx.fieldDataMeta.schema.set_data_type( + ctx.fieldDataMeta.field_schema.set_data_type( static_cast(data_type)); return ctx; } @@ -356,60 +356,6 @@ struct TypedScalarIndexTestV2::Helper { using C = arrow::DoubleType; }; -TYPED_TEST_SUITE_P(TypedScalarIndexTestV2); - -TYPED_TEST_P(TypedScalarIndexTestV2, Base) { - using T = TypeParam; - auto dtype = milvus::GetDType(); - auto index_types = GetIndexTypesV2(); - for (const auto& index_type : index_types) { - milvus::index::CreateIndexInfo create_index_info; - create_index_info.field_type = milvus::DataType(dtype); - create_index_info.index_type = index_type; - create_index_info.field_name = "scalar"; - - auto storage_config = get_default_local_storage_config(); - auto chunk_manager = - milvus::storage::CreateChunkManager(storage_config); - - milvus::test::TmpPath tmp_path; - auto temp_path = tmp_path.get(); - auto vec_size = DIM * 4; - auto dataset = GenDataset(nb, knowhere::metric::L2, false); - auto scalars = GenSortedArr(nb); - auto space = TestSpace(temp_path, vec_size, dataset, scalars); - milvus::storage::FileManagerContext file_manager_context( - {}, {.field_name = "scalar"}, chunk_manager, space); - file_manager_context.fieldDataMeta.schema.set_data_type( - static_cast(dtype)); - auto index = - milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info, file_manager_context, space); - auto scalar_index = - dynamic_cast*>(index.get()); - milvus::Config config; - if (index_type == "BITMAP") { - config["bitmap_cardinality_limit"] = "1000"; - } - scalar_index->BuildV2(config); - scalar_index->UploadV2(); - - auto new_index = - milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info, file_manager_context, space); - auto new_scalar_index = - dynamic_cast*>(new_index.get()); - new_scalar_index->LoadV2(); - ASSERT_EQ(nb, new_scalar_index->Count()); - } -} - -REGISTER_TYPED_TEST_SUITE_P(TypedScalarIndexTestV2, Base); - -INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck, - TypedScalarIndexTestV2, - ScalarT); - using namespace milvus::index; template std::vector diff --git a/internal/core/unittest/test_string_index.cpp b/internal/core/unittest/test_string_index.cpp index f26a59645c..bd006a5caf 100644 --- a/internal/core/unittest/test_string_index.cpp +++ b/internal/core/unittest/test_string_index.cpp @@ -123,7 +123,7 @@ TEST_F(StringIndexMarisaTest, Reverse) { auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { auto index = milvus::index::IndexFactory::GetInstance() - .CreateScalarIndex(index_type); + .CreatePrimitiveScalarIndex(index_type); index->Build(nb, strs.data()); assert_reverse(index.get(), strs); } diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index 8581c0453c..a02c5cfe3b 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -491,17 +491,14 @@ GetIndexTypes() { template inline std::vector GetIndexTypesV2() { - return std::vector{"sort", - milvus::index::INVERTED_INDEX_TYPE, - milvus::index::BITMAP_INDEX_TYPE}; + return std::vector{"sort", milvus::index::INVERTED_INDEX_TYPE}; } template <> inline std::vector GetIndexTypesV2() { return std::vector{"marisa", - milvus::index::INVERTED_INDEX_TYPE, - milvus::index::BITMAP_INDEX_TYPE}; + milvus::index::INVERTED_INDEX_TYPE}; } } // namespace diff --git a/pkg/util/indexparamcheck/bitmap_checker_test.go b/pkg/util/indexparamcheck/bitmap_checker_test.go index aa1baa8963..7f1bb38986 100644 --- a/pkg/util/indexparamcheck/bitmap_checker_test.go +++ b/pkg/util/indexparamcheck/bitmap_checker_test.go @@ -16,9 +16,9 @@ func Test_BitmapIndexChecker(t *testing.T) { assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_String)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) - assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckTrain(map[string]string{})) assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) } diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go index d41267987d..3b9be2786e 100644 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ b/pkg/util/indexparamcheck/bitmap_index_checker.go @@ -21,8 +21,8 @@ func (c *BITMAPChecker) CheckTrain(params map[string]string) error { } func (c *BITMAPChecker) CheckValidDataType(dType schemapb.DataType) error { - if !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) { - return fmt.Errorf("bitmap index are only supported on numeric and string field") + if !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) && !typeutil.IsArrayType(dType) { + return fmt.Errorf("bitmap index are only supported on numeric, string and array field") } return nil }