enhance: support array bitmap index (#33527)

#32900

---------

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2024-06-16 21:51:58 +08:00 committed by GitHub
parent e422168f09
commit d43ec4db0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 702 additions and 301 deletions

View File

@ -258,6 +258,34 @@ IsBinaryDataType(DataType data_type) {
return IsJsonDataType(data_type) || IsArrayDataType(data_type);
}
inline bool
IsPrimitiveType(proto::schema::DataType type) {
switch (type) {
case proto::schema::DataType::Bool:
case proto::schema::DataType::Int8:
case proto::schema::DataType::Int16:
case proto::schema::DataType::Int32:
case proto::schema::DataType::Int64:
case proto::schema::DataType::Float:
case proto::schema::DataType::Double:
case proto::schema::DataType::String:
case proto::schema::DataType::VarChar:
return true;
default:
return false;
}
}
inline bool
IsJsonType(proto::schema::DataType type) {
return type == proto::schema::DataType::JSON;
}
inline bool
IsArrayType(proto::schema::DataType type) {
return type == proto::schema::DataType::Array;
}
inline bool
IsBinaryVectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_BINARY;

View File

@ -33,7 +33,8 @@ namespace index {
template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false) {
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema) {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
@ -45,7 +46,9 @@ template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false), data_(), space_(space) {
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema),
space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
@ -67,27 +70,7 @@ BitmapIndex<T>::Build(const Config& config) {
auto field_datas =
file_manager_->CacheRawDataToMemory(insert_files.value());
int total_num_rows = 0;
for (const auto& field_data : field_datas) {
total_num_rows += field_data->get_num_rows();
}
if (total_num_rows == 0) {
throw SegcoreError(DataIsEmpty,
"scalar bitmap index can not build null values");
}
total_num_rows_ = total_num_rows;
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
offset++;
}
}
is_built_ = true;
BuildWithFieldData(field_datas);
}
template <typename T>
@ -144,6 +127,21 @@ BitmapIndex<T>::BuildV2(const Config& config) {
BuildWithFieldData(field_datas);
}
template <typename T>
void
BitmapIndex<T>::BuildPrimitiveField(
const std::vector<FieldDataPtr>& field_datas) {
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
offset++;
}
}
}
template <typename T>
void
BitmapIndex<T>::BuildWithFieldData(
@ -158,17 +156,46 @@ BitmapIndex<T>::BuildWithFieldData(
}
total_num_rows_ = total_num_rows;
switch (schema_.data_type()) {
case proto::schema::DataType::Bool:
case proto::schema::DataType::Int8:
case proto::schema::DataType::Int16:
case proto::schema::DataType::Int32:
case proto::schema::DataType::Int64:
case proto::schema::DataType::Float:
case proto::schema::DataType::Double:
case proto::schema::DataType::String:
case proto::schema::DataType::VarChar:
BuildPrimitiveField(field_datas);
break;
case proto::schema::DataType::Array:
BuildArrayField(field_datas);
break;
default:
PanicInfo(
DataTypeInvalid,
fmt::format("Invalid data type: {} for build bitmap index",
proto::schema::DataType_Name(schema_.data_type())));
}
is_built_ = true;
}
template <typename T>
void
BitmapIndex<T>::BuildArrayField(const std::vector<FieldDataPtr>& field_datas) {
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
auto array =
reinterpret_cast<const milvus::Array*>(data->RawValue(i));
for (size_t j = 0; j < array->length(); ++j) {
auto val = array->template get_data<T>(j);
data_[val].add(offset);
}
offset++;
}
}
is_built_ = true;
}
template <typename T>
@ -877,4 +904,4 @@ template class BitmapIndex<double>;
template class BitmapIndex<std::string>;
} // namespace index
} // namespace milvus
} // namespace milvus

View File

@ -50,17 +50,6 @@ class BitmapIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
explicit BitmapIndex(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
: file_manager_(file_manager) {
}
explicit BitmapIndex(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
std::shared_ptr<milvus_storage::Space> space)
: file_manager_(file_manager), space_(space) {
}
~BitmapIndex() override = default;
BinarySet
@ -117,6 +106,7 @@ class BitmapIndex : public ScalarIndex<T> {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
@ -125,6 +115,11 @@ class BitmapIndex : public ScalarIndex<T> {
return true;
}
void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;
public:
int64_t
Cardinality() {
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
@ -134,11 +129,13 @@ class BitmapIndex : public ScalarIndex<T> {
}
}
void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;
private:
void
BuildPrimitiveField(const std::vector<FieldDataPtr>& datas);
void
BuildArrayField(const std::vector<FieldDataPtr>& datas);
size_t
GetIndexDataSize();
@ -188,6 +185,7 @@ class BitmapIndex : public ScalarIndex<T> {
std::map<T, roaring::Roaring> data_;
std::map<T, TargetBitmap> bitsets_;
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};

View File

@ -32,12 +32,14 @@ template <typename T>
HybridScalarIndex<T>::HybridScalarIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false),
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) {
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
file_manager_context_(file_manager_context) {
if (file_manager_context.Valid()) {
file_manager_ =
mem_file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!");
}
field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type();
internal_index_type_ = InternalIndexType::NONE;
}
@ -47,12 +49,14 @@ HybridScalarIndex<T>::HybridScalarIndex(
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false),
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
file_manager_context_(file_manager_context),
space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
mem_file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!");
}
field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type();
internal_index_type_ = InternalIndexType::NONE;
}
@ -96,7 +100,7 @@ HybridScalarIndex<std::string>::SelectIndexBuildType(
template <typename T>
InternalIndexType
HybridScalarIndex<T>::SelectIndexBuildType(
HybridScalarIndex<T>::SelectBuildTypeForPrimitiveType(
const std::vector<FieldDataPtr>& field_datas) {
std::set<T> distinct_vals;
for (const auto& data : field_datas) {
@ -121,7 +125,7 @@ HybridScalarIndex<T>::SelectIndexBuildType(
template <>
InternalIndexType
HybridScalarIndex<std::string>::SelectIndexBuildType(
HybridScalarIndex<std::string>::SelectBuildTypeForPrimitiveType(
const std::vector<FieldDataPtr>& field_datas) {
std::set<std::string> distinct_vals;
for (const auto& data : field_datas) {
@ -144,6 +148,52 @@ HybridScalarIndex<std::string>::SelectIndexBuildType(
return internal_index_type_;
}
template <typename T>
InternalIndexType
HybridScalarIndex<T>::SelectBuildTypeForArrayType(
const std::vector<FieldDataPtr>& field_datas) {
std::set<T> distinct_vals;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto array =
reinterpret_cast<const milvus::Array*>(data->RawValue(i));
for (size_t j = 0; j < array->length(); ++j) {
auto val = array->template get_data<T>(j);
distinct_vals.insert(val);
// Limit the bitmap index cardinality because of memory usage
if (distinct_vals.size() > bitmap_index_cardinality_limit_) {
break;
}
}
}
}
// Decide whether to select bitmap index or inverted index
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
internal_index_type_ = InternalIndexType::INVERTED;
} else {
internal_index_type_ = InternalIndexType::BITMAP;
}
return internal_index_type_;
}
template <typename T>
InternalIndexType
HybridScalarIndex<T>::SelectIndexBuildType(
const std::vector<FieldDataPtr>& field_datas) {
std::set<T> distinct_vals;
if (IsPrimitiveType(field_type_)) {
return SelectBuildTypeForPrimitiveType(field_datas);
} else if (IsArrayType(field_type_)) {
return SelectBuildTypeForArrayType(field_datas);
} else {
PanicInfo(Unsupported,
fmt::format("unsupported build index for type {}",
DataType_Name(field_type_)));
}
}
template <typename T>
std::shared_ptr<ScalarIndex<T>>
HybridScalarIndex<T>::GetInternalIndex() {
@ -151,9 +201,14 @@ HybridScalarIndex<T>::GetInternalIndex() {
return internal_index_;
}
if (internal_index_type_ == InternalIndexType::BITMAP) {
internal_index_ = std::make_shared<BitmapIndex<T>>(file_manager_);
internal_index_ =
std::make_shared<BitmapIndex<T>>(file_manager_context_);
} else if (internal_index_type_ == InternalIndexType::STLSORT) {
internal_index_ = std::make_shared<ScalarIndexSort<T>>(file_manager_);
internal_index_ =
std::make_shared<ScalarIndexSort<T>>(file_manager_context_);
} else if (internal_index_type_ == InternalIndexType::INVERTED) {
internal_index_ =
std::make_shared<InvertedIndexTantivy<T>>(file_manager_context_);
} else {
PanicInfo(UnexpectedError,
"unknown index type when get internal index");
@ -170,9 +225,13 @@ HybridScalarIndex<std::string>::GetInternalIndex() {
if (internal_index_type_ == InternalIndexType::BITMAP) {
internal_index_ =
std::make_shared<BitmapIndex<std::string>>(file_manager_);
std::make_shared<BitmapIndex<std::string>>(file_manager_context_);
} else if (internal_index_type_ == InternalIndexType::MARISA) {
internal_index_ = std::make_shared<StringIndexMarisa>(file_manager_);
internal_index_ =
std::make_shared<StringIndexMarisa>(file_manager_context_);
} else if (internal_index_type_ == InternalIndexType::INVERTED) {
internal_index_ = std::make_shared<InvertedIndexTantivy<std::string>>(
file_manager_context_);
} else {
PanicInfo(UnexpectedError,
"unknown index type when get internal index");
@ -206,7 +265,7 @@ HybridScalarIndex<T>::Build(const Config& config) {
"insert file paths is empty when build index");
auto field_datas =
file_manager_->CacheRawDataToMemory(insert_files.value());
mem_file_manager_->CacheRawDataToMemory(insert_files.value());
SelectIndexBuildType(field_datas);
BuildInternal(field_datas);
@ -224,7 +283,7 @@ HybridScalarIndex<T>::BuildV2(const Config& config) {
LOG_INFO("config bitmap cardinality limit to {}",
bitmap_index_cardinality_limit_);
auto field_name = file_manager_->GetIndexMeta().field_name;
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
@ -262,32 +321,51 @@ HybridScalarIndex<T>::Serialize(const Config& config) {
template <typename T>
BinarySet
HybridScalarIndex<T>::Upload(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFile(binary_set);
HybridScalarIndex<T>::SerializeIndexType() {
// Add index type info to storage for future restruct index
BinarySet index_binary_set;
std::shared_ptr<uint8_t[]> index_type_buf(new uint8_t[sizeof(uint8_t)]);
index_type_buf[0] = static_cast<uint8_t>(internal_index_type_);
index_binary_set.Append(index::INDEX_TYPE, index_type_buf, sizeof(uint8_t));
mem_file_manager_->AddFile(index_binary_set);
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet ret;
auto remote_paths_to_size = mem_file_manager_->GetRemotePathsToFileSize();
BinarySet ret_set;
Assert(remote_paths_to_size.size() == 1);
for (auto& file : remote_paths_to_size) {
ret.Append(file.first, nullptr, file.second);
ret_set.Append(file.first, nullptr, file.second);
}
return ret_set;
}
template <typename T>
BinarySet
HybridScalarIndex<T>::Upload(const Config& config) {
auto internal_index = GetInternalIndex();
auto index_ret = internal_index->Upload(config);
auto index_type_ret = SerializeIndexType();
for (auto& [key, value] : index_type_ret.binary_map_) {
index_ret.Append(key, value);
}
return ret;
return index_ret;
}
template <typename T>
BinarySet
HybridScalarIndex<T>::UploadV2(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFileV2(binary_set);
auto internal_index = GetInternalIndex();
auto index_ret = internal_index->Upload(config);
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet ret;
for (auto& file : remote_paths_to_size) {
ret.Append(file.first, nullptr, file.second);
auto index_type_ret = SerializeIndexType();
for (auto& [key, value] : index_type_ret.binary_map_) {
index_ret.Append(key, value);
}
return ret;
return index_ret;
}
template <typename T>
@ -301,64 +379,32 @@ HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
template <typename T>
void
HybridScalarIndex<T>::LoadInternal(const BinarySet& binary_set,
const Config& config) {
auto index = GetInternalIndex();
index->LoadWithoutAssemble(binary_set, config);
HybridScalarIndex<T>::LoadV2(const Config& config) {
PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented");
}
template <typename T>
std::string
HybridScalarIndex<T>::GetRemoteIndexTypeFile(
const std::vector<std::string>& files) {
std::string ret;
for (auto& file : files) {
auto file_name = file.substr(file.find_last_of('/') + 1);
if (file_name == index::INDEX_TYPE) {
ret = file;
}
}
AssertInfo(!ret.empty(), "index type file not found for hybrid index");
return ret;
}
template <typename T>
void
HybridScalarIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
milvus::Assemble(const_cast<BinarySet&>(binary_set));
DeserializeIndexType(binary_set);
LoadInternal(binary_set, config);
is_built_ = true;
}
template <typename T>
void
HybridScalarIndex<T>::LoadV2(const Config& config) {
auto blobs = space_->StatisticsBlobs();
std::vector<std::string> index_files;
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& b : blobs) {
if (b.name.rfind(prefix, 0) == 0) {
index_files.push_back(b.name);
}
}
std::map<std::string, FieldDataPtr> index_datas{};
for (auto& file_name : index_files) {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto raw_index_blob =
storage::DeserializeFileData(index_blob_data, res.value());
auto key = file_name.substr(file_name.find_last_of('/') + 1);
index_datas[key] = raw_index_blob->GetFieldData();
}
AssembleIndexDatas(index_datas);
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
auto size = data->Size();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto buf = std::shared_ptr<uint8_t[]>(
(uint8_t*)const_cast<void*>(data->Data()), deleter);
binary_set.Append(key, buf, size);
}
DeserializeIndexType(binary_set);
LoadInternal(binary_set, config);
auto index = GetInternalIndex();
index->Load(binary_set, config);
is_built_ = true;
}
@ -371,7 +417,11 @@ HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
AssertInfo(index_files.has_value(),
"index file paths is empty when load bitmap index");
auto index_datas = file_manager_->LoadIndexToMemory(index_files.value());
auto index_type_file = GetRemoteIndexTypeFile(index_files.value());
auto index_datas = mem_file_manager_->LoadIndexToMemory(
std::vector<std::string>{index_type_file});
AssembleIndexDatas(index_datas);
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
@ -384,7 +434,8 @@ HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
DeserializeIndexType(binary_set);
LoadInternal(binary_set, config);
auto index = GetInternalIndex();
index->Load(ctx, config);
is_built_ = true;
}

View File

@ -24,6 +24,7 @@
#include "index/BitmapIndex.h"
#include "index/ScalarIndexSort.h"
#include "index/StringIndexMarisa.h"
#include "index/InvertedIndexTantivy.h"
#include "storage/FileManager.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/MemFileManagerImpl.h"
@ -37,6 +38,7 @@ enum class InternalIndexType {
BITMAP,
STLSORT,
MARISA,
INVERTED,
};
/*
@ -125,6 +127,9 @@ class HybridScalarIndex : public ScalarIndex<T> {
const bool
HasRawData() const override {
if (field_type_ == proto::schema::DataType::Array) {
return false;
}
return internal_index_->HasRawData();
}
@ -135,30 +140,42 @@ class HybridScalarIndex : public ScalarIndex<T> {
UploadV2(const Config& config = {}) override;
private:
InternalIndexType
SelectBuildTypeForPrimitiveType(
const std::vector<FieldDataPtr>& field_datas);
InternalIndexType
SelectBuildTypeForArrayType(const std::vector<FieldDataPtr>& field_datas);
InternalIndexType
SelectIndexBuildType(const std::vector<FieldDataPtr>& field_datas);
InternalIndexType
SelectIndexBuildType(size_t n, const T* values);
BinarySet
SerializeIndexType();
void
DeserializeIndexType(const BinarySet& binary_set);
void
BuildInternal(const std::vector<FieldDataPtr>& field_datas);
void
LoadInternal(const BinarySet& binary_set, const Config& config);
std::shared_ptr<ScalarIndex<T>>
GetInternalIndex();
std::string
GetRemoteIndexTypeFile(const std::vector<std::string>& files);
public:
bool is_built_{false};
int32_t bitmap_index_cardinality_limit_;
proto::schema::DataType field_type_;
InternalIndexType internal_index_type_;
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
std::shared_ptr<storage::MemFileManagerImpl> file_manager_{nullptr};
storage::FileManagerContext file_manager_context_;
std::shared_ptr<storage::MemFileManagerImpl> mem_file_manager_{nullptr};
std::shared_ptr<milvus_storage::Space> space_{nullptr};
};

View File

@ -33,7 +33,7 @@ namespace milvus::index {
template <typename T>
ScalarIndexPtr<T>
IndexFactory::CreateScalarIndex(
IndexFactory::CreatePrimitiveScalarIndex(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context) {
if (index_type == INVERTED_INDEX_TYPE) {
@ -54,7 +54,7 @@ IndexFactory::CreateScalarIndex(
template <>
ScalarIndexPtr<std::string>
IndexFactory::CreateScalarIndex<std::string>(
IndexFactory::CreatePrimitiveScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context) {
#if defined(__linux__) || defined(__APPLE__)
@ -74,7 +74,7 @@ IndexFactory::CreateScalarIndex<std::string>(
template <typename T>
ScalarIndexPtr<T>
IndexFactory::CreateScalarIndex(
IndexFactory::CreatePrimitiveScalarIndex(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
@ -91,7 +91,7 @@ IndexFactory::CreateScalarIndex(
template <>
ScalarIndexPtr<std::string>
IndexFactory::CreateScalarIndex<std::string>(
IndexFactory::CreatePrimitiveScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
@ -142,25 +142,32 @@ IndexFactory::CreatePrimitiveScalarIndex(
switch (data_type) {
// create scalar index
case DataType::BOOL:
return CreateScalarIndex<bool>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<bool>(index_type,
file_manager_context);
case DataType::INT8:
return CreateScalarIndex<int8_t>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<int8_t>(index_type,
file_manager_context);
case DataType::INT16:
return CreateScalarIndex<int16_t>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<int16_t>(index_type,
file_manager_context);
case DataType::INT32:
return CreateScalarIndex<int32_t>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<int32_t>(index_type,
file_manager_context);
case DataType::INT64:
return CreateScalarIndex<int64_t>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<int64_t>(index_type,
file_manager_context);
case DataType::FLOAT:
return CreateScalarIndex<float>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<float>(index_type,
file_manager_context);
case DataType::DOUBLE:
return CreateScalarIndex<double>(index_type, file_manager_context);
return CreatePrimitiveScalarIndex<double>(index_type,
file_manager_context);
// create string index
case DataType::STRING:
case DataType::VARCHAR:
return CreateScalarIndex<std::string>(index_type,
file_manager_context);
return CreatePrimitiveScalarIndex<std::string>(
index_type, file_manager_context);
default:
throw SegcoreError(
DataTypeInvalid,
@ -168,21 +175,57 @@ IndexFactory::CreatePrimitiveScalarIndex(
}
}
IndexBasePtr
IndexFactory::CreateCompositeScalarIndex(
IndexType index_type,
const storage::FileManagerContext& file_manager_context) {
if (index_type == BITMAP_INDEX_TYPE) {
auto element_type = static_cast<DataType>(
file_manager_context.fieldDataMeta.field_schema.element_type());
return CreatePrimitiveScalarIndex(
element_type, index_type, file_manager_context);
} else if (index_type == INVERTED_INDEX_TYPE) {
auto element_type = static_cast<DataType>(
file_manager_context.fieldDataMeta.field_schema.element_type());
return CreatePrimitiveScalarIndex(
element_type, index_type, file_manager_context);
}
}
IndexBasePtr
IndexFactory::CreateComplexScalarIndex(
IndexType index_type,
const storage::FileManagerContext& file_manager_context) {
PanicInfo(Unsupported, "Complex index not supported now");
}
IndexBasePtr
IndexFactory::CreateScalarIndex(
const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context) {
switch (create_index_info.field_type) {
case DataType::ARRAY:
auto data_type = create_index_info.field_type;
switch (data_type) {
case DataType::BOOL:
case DataType::INT8:
case DataType::INT16:
case DataType::INT32:
case DataType::INT64:
case DataType::FLOAT:
case DataType::DOUBLE:
case DataType::VARCHAR:
case DataType::STRING:
return CreatePrimitiveScalarIndex(
static_cast<DataType>(
file_manager_context.fieldDataMeta.schema.element_type()),
create_index_info.index_type,
file_manager_context);
default:
return CreatePrimitiveScalarIndex(create_index_info.field_type,
create_index_info.index_type,
data_type, create_index_info.index_type, file_manager_context);
case DataType::ARRAY: {
return CreateCompositeScalarIndex(create_index_info.index_type,
file_manager_context);
}
case DataType::JSON: {
return CreateComplexScalarIndex(create_index_info.index_type,
file_manager_context);
}
default:
PanicInfo(DataTypeInvalid, "Invalid data type:{}", data_type);
}
}
@ -251,43 +294,6 @@ IndexFactory::CreateVectorIndex(
}
}
IndexBasePtr
IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager,
std::shared_ptr<milvus_storage::Space> space) {
auto data_type = create_index_info.field_type;
auto index_type = create_index_info.index_type;
switch (data_type) {
// create scalar index
case DataType::BOOL:
return CreateScalarIndex<bool>(index_type, file_manager, space);
case DataType::INT8:
return CreateScalarIndex<int8_t>(index_type, file_manager, space);
case DataType::INT16:
return CreateScalarIndex<int16_t>(index_type, file_manager, space);
case DataType::INT32:
return CreateScalarIndex<int32_t>(index_type, file_manager, space);
case DataType::INT64:
return CreateScalarIndex<int64_t>(index_type, file_manager, space);
case DataType::FLOAT:
return CreateScalarIndex<float>(index_type, file_manager, space);
case DataType::DOUBLE:
return CreateScalarIndex<double>(index_type, file_manager, space);
// create string index
case DataType::STRING:
case DataType::VARCHAR:
return CreateScalarIndex<std::string>(
index_type, file_manager, space);
default:
throw SegcoreError(
DataTypeInvalid,
fmt::format("invalid data type to build mem index: {}",
data_type));
}
}
IndexBasePtr
IndexFactory::CreateVectorIndex(
const CreateIndexInfo& create_index_info,

View File

@ -65,6 +65,7 @@ class IndexFactory {
CreateVectorIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context);
// For base types like int, float, double, string, etc
IndexBasePtr
CreatePrimitiveScalarIndex(
DataType data_type,
@ -72,6 +73,20 @@ class IndexFactory {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
// For types like array, struct, union, etc
IndexBasePtr
CreateCompositeScalarIndex(
IndexType index_type,
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
// For types like Json, XML, etc
IndexBasePtr
CreateComplexScalarIndex(
IndexType index_type,
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
IndexBasePtr
CreateScalarIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context =
@ -85,7 +100,10 @@ class IndexFactory {
IndexBasePtr
CreateScalarIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
std::shared_ptr<milvus_storage::Space> space) {
PanicInfo(ErrorCode::Unsupported,
"CreateScalarIndexV2 not implemented");
}
// IndexBasePtr
// CreateIndex(DataType dtype, const IndexType& index_type);
@ -94,28 +112,15 @@ class IndexFactory {
template <typename T>
ScalarIndexPtr<T>
CreateScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager =
storage::FileManagerContext());
CreatePrimitiveScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager =
storage::FileManagerContext());
template <typename T>
ScalarIndexPtr<T>
CreateScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager,
std::shared_ptr<milvus_storage::Space> space);
CreatePrimitiveScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager,
std::shared_ptr<milvus_storage::Space> space);
};
// template <>
// ScalarIndexPtr<std::string>
// IndexFactory::CreateScalarIndex<std::string>(
// const IndexType& index_type,
// const storage::FileManagerContext& file_manager_context,
// DataType d_type);
template <>
ScalarIndexPtr<std::string>
IndexFactory::CreateScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
} // namespace milvus::index

View File

@ -66,7 +66,7 @@ template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx,
std::shared_ptr<milvus_storage::Space> space)
: space_(space), schema_(ctx.fieldDataMeta.schema) {
: space_(space), schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx, ctx.space_);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx, ctx.space_);
auto field =
@ -259,8 +259,7 @@ InvertedIndexTantivy<T>::InApplyCallback(
template <typename T>
const TargetBitmap
InvertedIndexTantivy<T>::NotIn(size_t n, const T* values) {
TargetBitmap bitset(Count());
bitset.set();
TargetBitmap bitset(Count(), true);
for (size_t i = 0; i < n; ++i) {
auto array = wrapper_->term_query(values[i]);
apply_hits(bitset, array, false);

View File

@ -41,17 +41,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
explicit ScalarIndexSort(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
: file_manager_(file_manager) {
}
explicit ScalarIndexSort(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
std::shared_ptr<milvus_storage::Space> space)
: file_manager_(file_manager), space_(space) {
}
BinarySet
Serialize(const Config& config) override;

View File

@ -37,17 +37,6 @@ class StringIndexMarisa : public StringIndex {
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
explicit StringIndexMarisa(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
: file_manager_(file_manager) {
}
explicit StringIndexMarisa(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
std::shared_ptr<milvus_storage::Space> space)
: file_manager_(file_manager), space_(space) {
}
int64_t
Size() override;

View File

@ -274,7 +274,8 @@ CreateIndexV2(CIndex* res_index,
build_index_info->collectionid(),
build_index_info->partitionid(),
build_index_info->segmentid(),
build_index_info->field_schema().fieldid()};
build_index_info->field_schema().fieldid(),
build_index_info->field_schema()};
milvus::storage::IndexMeta index_meta{
build_index_info->segmentid(),
build_index_info->field_schema().fieldid(),

View File

@ -64,7 +64,7 @@ struct FieldDataMeta {
int64_t partition_id;
int64_t segment_id;
int64_t field_id;
proto::schema::FieldSchema schema;
proto::schema::FieldSchema field_schema;
};
enum CodecType {

View File

@ -20,7 +20,6 @@ set(MILVUS_TEST_FILES
test_bf.cpp
test_bf_sparse.cpp
test_binary.cpp
test_bitmap.cpp
test_bool_index.cpp
test_common.cpp
test_concurrent_vector.cpp
@ -33,6 +32,7 @@ set(MILVUS_TEST_FILES
test_growing_index.cpp
test_indexing.cpp
test_hybrid_index.cpp
test_array_bitmap_index.cpp
test_index_c_api.cpp
test_index_wrapper.cpp
test_init.cpp

View File

@ -0,0 +1,330 @@
// Copyright(C) 2019 - 2020 Zilliz.All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <functional>
#include <boost/filesystem.hpp>
#include <unordered_set>
#include <memory>
#include "common/Tracer.h"
#include "index/BitmapIndex.h"
#include "storage/Util.h"
#include "storage/InsertData.h"
#include "indexbuilder/IndexFactory.h"
#include "index/IndexFactory.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "index/Meta.h"
#include "pb/schema.pb.h"
using namespace milvus::index;
using namespace milvus::indexbuilder;
using namespace milvus;
using namespace milvus::index;
template <typename T>
static std::vector<T>
GenerateData(const size_t size, const size_t cardinality) {
std::vector<T> result;
for (size_t i = 0; i < size; ++i) {
result.push_back(rand() % cardinality);
}
return result;
}
template <>
std::vector<bool>
GenerateData<bool>(const size_t size, const size_t cardinality) {
std::vector<bool> result;
for (size_t i = 0; i < size; ++i) {
result.push_back(rand() % 2 == 0);
}
return result;
}
template <>
std::vector<std::string>
GenerateData<std::string>(const size_t size, const size_t cardinality) {
std::vector<std::string> result;
for (size_t i = 0; i < size; ++i) {
result.push_back(std::to_string(rand() % cardinality));
}
return result;
}
std::vector<milvus::Array>
GenerateArrayData(proto::schema::DataType element_type,
int cardinality,
int size,
int array_len) {
std::vector<ScalarArray> data(size);
switch (element_type) {
case proto::schema::DataType::Bool: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_bool_data()->add_data(
static_cast<bool>(random()));
}
data[i] = field_data;
}
break;
}
case proto::schema::DataType::Int8:
case proto::schema::DataType::Int16:
case proto::schema::DataType::Int32: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_int_data()->add_data(
static_cast<int>(random() % cardinality));
}
data[i] = field_data;
}
break;
}
case proto::schema::DataType::Int64: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_long_data()->add_data(
static_cast<int64_t>(random() % cardinality));
}
data[i] = field_data;
}
break;
}
case proto::schema::DataType::String: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_string_data()->add_data(
std::to_string(random() % cardinality));
}
data[i] = field_data;
}
break;
}
case proto::schema::DataType::Float: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_float_data()->add_data(
static_cast<float>(random() % cardinality));
}
data[i] = field_data;
}
break;
}
case proto::schema::DataType::Double: {
for (int i = 0; i < size; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_double_data()->add_data(
static_cast<double>(random() % cardinality));
}
data[i] = field_data;
}
break;
}
default: {
throw std::runtime_error("unsupported data type");
}
}
std::vector<milvus::Array> res;
for (int i = 0; i < size; i++) {
res.push_back(milvus::Array(data[i]));
}
return res;
}
template <typename T>
class ArrayBitmapIndexTest : public testing::Test {
protected:
void
Init(int64_t collection_id,
int64_t partition_id,
int64_t segment_id,
int64_t field_id,
int64_t index_build_id,
int64_t index_version) {
proto::schema::FieldSchema field_schema;
field_schema.set_data_type(proto::schema::DataType::Array);
proto::schema::DataType element_type;
if constexpr (std::is_same_v<int8_t, T>) {
element_type = proto::schema::DataType::Int8;
} else if constexpr (std::is_same_v<int16_t, T>) {
element_type = proto::schema::DataType::Int16;
} else if constexpr (std::is_same_v<int32_t, T>) {
element_type = proto::schema::DataType::Int32;
} else if constexpr (std::is_same_v<int64_t, T>) {
element_type = proto::schema::DataType::Int64;
} else if constexpr (std::is_same_v<float, T>) {
element_type = proto::schema::DataType::Float;
} else if constexpr (std::is_same_v<double, T>) {
element_type = proto::schema::DataType::Double;
} else if constexpr (std::is_same_v<std::string, T>) {
element_type = proto::schema::DataType::String;
}
field_schema.set_element_type(element_type);
auto field_meta = storage::FieldDataMeta{
collection_id, partition_id, segment_id, field_id, field_schema};
auto index_meta = storage::IndexMeta{
segment_id, field_id, index_build_id, index_version};
data_ = GenerateArrayData(element_type, cardinality_, nb_, 10);
auto field_data = storage::CreateFieldData(DataType::ARRAY);
field_data->FillFieldData(data_.data(), data_.size());
storage::InsertData insert_data(field_data);
insert_data.SetFieldDataMeta(field_meta);
insert_data.SetTimestamps(0, 100);
auto serialized_bytes = insert_data.Serialize(storage::Remote);
auto log_path = fmt::format("{}/{}/{}/{}/{}/{}",
"test_array_bitmap",
collection_id,
partition_id,
segment_id,
field_id,
0);
chunk_manager_->Write(
log_path, serialized_bytes.data(), serialized_bytes.size());
storage::FileManagerContext ctx(field_meta, index_meta, chunk_manager_);
std::vector<std::string> index_files;
Config config;
config["index_type"] = milvus::index::BITMAP_INDEX_TYPE;
config["insert_files"] = std::vector<std::string>{log_path};
config["bitmap_cardinality_limit"] = "1000";
auto build_index =
indexbuilder::IndexFactory::GetInstance().CreateIndex(
DataType::ARRAY, config, ctx);
build_index->Build();
auto binary_set = build_index->Upload();
for (const auto& [key, _] : binary_set.binary_map_) {
index_files.push_back(key);
}
index::CreateIndexInfo index_info{};
index_info.index_type = milvus::index::BITMAP_INDEX_TYPE;
index_info.field_type = DataType::ARRAY;
config["index_files"] = index_files;
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
}
void
SetUp() override {
nb_ = 10000;
cardinality_ = 30;
// if constexpr (std::is_same_v<T, int8_t>) {
// type_ = DataType::INT8;
// } else if constexpr (std::is_same_v<T, int16_t>) {
// type_ = DataType::INT16;
// } else if constexpr (std::is_same_v<T, int32_t>) {
// type_ = DataType::INT32;
// } else if constexpr (std::is_same_v<T, int64_t>) {
// type_ = DataType::INT64;
// } else if constexpr (std::is_same_v<T, std::string>) {
// type_ = DataType::VARCHAR;
// }
int64_t collection_id = 1;
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index/";
storage::StorageConfig storage_config;
storage_config.storage_type = "local";
storage_config.root_path = root_path;
chunk_manager_ = storage::CreateChunkManager(storage_config);
Init(collection_id,
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
}
virtual ~ArrayBitmapIndexTest() override {
boost::filesystem::remove_all(chunk_manager_->GetRootPath());
}
public:
void
TestInFunc() {
// boost::container::vector<T> test_data;
// std::unordered_set<T> s;
// size_t nq = 10;
// for (size_t i = 0; i < nq; i++) {
// test_data.push_back(data_[i]);
// s.insert(data_[i]);
// }
// auto index_ptr = dynamic_cast<index::ScalarIndex<T>*>(index_.get());
// auto bitset = index_ptr->In(test_data.size(), test_data.data());
// for (size_t i = 0; i < bitset.size(); i++) {
// ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end());
// }
}
private:
std::shared_ptr<storage::ChunkManager> chunk_manager_;
public:
DataType type_;
IndexBasePtr index_;
size_t nb_;
size_t cardinality_;
std::vector<milvus::Array> data_;
};
TYPED_TEST_SUITE_P(ArrayBitmapIndexTest);
TYPED_TEST_P(ArrayBitmapIndexTest, CountFuncTest) {
auto count = this->index_->Count();
EXPECT_EQ(count, this->nb_);
}
TYPED_TEST_P(ArrayBitmapIndexTest, INFuncTest) {
// this->TestInFunc();
}
TYPED_TEST_P(ArrayBitmapIndexTest, NotINFuncTest) {
//this->TestNotInFunc();
}
using BitmapType =
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTest,
CountFuncTest,
INFuncTest,
NotINFuncTest);
INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheck,
ArrayBitmapIndexTest,
BitmapType);

View File

@ -24,6 +24,7 @@
#include "index/IndexFactory.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "index/Meta.h"
#include "pb/schema.pb.h"
using namespace milvus::index;
using namespace milvus::indexbuilder;
@ -70,8 +71,24 @@ class HybridIndexTestV1 : public testing::Test {
int64_t field_id,
int64_t index_build_id,
int64_t index_version) {
proto::schema::FieldSchema field_schema;
if constexpr (std::is_same_v<int8_t, T>) {
field_schema.set_data_type(proto::schema::DataType::Int8);
} else if constexpr (std::is_same_v<int16_t, T>) {
field_schema.set_data_type(proto::schema::DataType::Int16);
} else if constexpr (std::is_same_v<int32_t, T>) {
field_schema.set_data_type(proto::schema::DataType::Int32);
} else if constexpr (std::is_same_v<int64_t, T>) {
field_schema.set_data_type(proto::schema::DataType::Int64);
} else if constexpr (std::is_same_v<float, T>) {
field_schema.set_data_type(proto::schema::DataType::Float);
} else if constexpr (std::is_same_v<double, T>) {
field_schema.set_data_type(proto::schema::DataType::Double);
} else if constexpr (std::is_same_v<std::string, T>) {
field_schema.set_data_type(proto::schema::DataType::String);
}
auto field_meta = storage::FieldDataMeta{
collection_id, partition_id, segment_id, field_id};
collection_id, partition_id, segment_id, field_id, field_schema};
auto index_meta = storage::IndexMeta{
segment_id, field_id, index_build_id, index_version};

View File

@ -40,8 +40,9 @@ gen_field_meta(int64_t collection_id = 1,
.segment_id = segment_id,
.field_id = field_id,
};
meta.schema.set_data_type(static_cast<proto::schema::DataType>(data_type));
meta.schema.set_element_type(
meta.field_schema.set_data_type(
static_cast<proto::schema::DataType>(data_type));
meta.field_schema.set_element_type(
static_cast<proto::schema::DataType>(element_type));
return meta;
}

View File

@ -56,7 +56,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) {
auto
GetTempFileManagerCtx(CDataType data_type) {
auto ctx = milvus::storage::FileManagerContext();
ctx.fieldDataMeta.schema.set_data_type(
ctx.fieldDataMeta.field_schema.set_data_type(
static_cast<milvus::proto::schema::DataType>(data_type));
return ctx;
}
@ -356,60 +356,6 @@ struct TypedScalarIndexTestV2<double>::Helper {
using C = arrow::DoubleType;
};
TYPED_TEST_SUITE_P(TypedScalarIndexTestV2);
TYPED_TEST_P(TypedScalarIndexTestV2, Base) {
using T = TypeParam;
auto dtype = milvus::GetDType<T>();
auto index_types = GetIndexTypesV2<T>();
for (const auto& index_type : index_types) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
create_index_info.field_name = "scalar";
auto storage_config = get_default_local_storage_config();
auto chunk_manager =
milvus::storage::CreateChunkManager(storage_config);
milvus::test::TmpPath tmp_path;
auto temp_path = tmp_path.get();
auto vec_size = DIM * 4;
auto dataset = GenDataset(nb, knowhere::metric::L2, false);
auto scalars = GenSortedArr<T>(nb);
auto space = TestSpace<T>(temp_path, vec_size, dataset, scalars);
milvus::storage::FileManagerContext file_manager_context(
{}, {.field_name = "scalar"}, chunk_manager, space);
file_manager_context.fieldDataMeta.schema.set_data_type(
static_cast<milvus::proto::schema::DataType>(dtype));
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info, file_manager_context, space);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
milvus::Config config;
if (index_type == "BITMAP") {
config["bitmap_cardinality_limit"] = "1000";
}
scalar_index->BuildV2(config);
scalar_index->UploadV2();
auto new_index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info, file_manager_context, space);
auto new_scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(new_index.get());
new_scalar_index->LoadV2();
ASSERT_EQ(nb, new_scalar_index->Count());
}
}
REGISTER_TYPED_TEST_SUITE_P(TypedScalarIndexTestV2, Base);
INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck,
TypedScalarIndexTestV2,
ScalarT);
using namespace milvus::index;
template <typename T>
std::vector<T>

View File

@ -123,7 +123,7 @@ TEST_F(StringIndexMarisaTest, Reverse) {
auto index_types = GetIndexTypes<std::string>();
for (const auto& index_type : index_types) {
auto index = milvus::index::IndexFactory::GetInstance()
.CreateScalarIndex<std::string>(index_type);
.CreatePrimitiveScalarIndex<std::string>(index_type);
index->Build(nb, strs.data());
assert_reverse<std::string>(index.get(), strs);
}

View File

@ -491,17 +491,14 @@ GetIndexTypes<std::string>() {
template <typename T>
inline std::vector<std::string>
GetIndexTypesV2() {
return std::vector<std::string>{"sort",
milvus::index::INVERTED_INDEX_TYPE,
milvus::index::BITMAP_INDEX_TYPE};
return std::vector<std::string>{"sort", milvus::index::INVERTED_INDEX_TYPE};
}
template <>
inline std::vector<std::string>
GetIndexTypesV2<std::string>() {
return std::vector<std::string>{"marisa",
milvus::index::INVERTED_INDEX_TYPE,
milvus::index::BITMAP_INDEX_TYPE};
milvus::index::INVERTED_INDEX_TYPE};
}
} // namespace

View File

@ -16,9 +16,9 @@ func Test_BitmapIndexChecker(t *testing.T) {
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64))
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float))
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_String))
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Array))
assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON))
assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array))
assert.Error(t, c.CheckTrain(map[string]string{}))
assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"}))
}

View File

@ -21,8 +21,8 @@ func (c *BITMAPChecker) CheckTrain(params map[string]string) error {
}
func (c *BITMAPChecker) CheckValidDataType(dType schemapb.DataType) error {
if !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) {
return fmt.Errorf("bitmap index are only supported on numeric and string field")
if !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) && !typeutil.IsArrayType(dType) {
return fmt.Errorf("bitmap index are only supported on numeric, string and array field")
}
return nil
}