mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: optimize bitmap index (#33358)
#32900 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
8f46a20957
commit
589d4dfd82
@ -61,3 +61,5 @@ constexpr const char* RANGE_FILTER = knowhere::meta::RANGE_FILTER;
|
|||||||
const int64_t DEFAULT_MAX_OUTPUT_SIZE = 67108864; // bytes, 64MB
|
const int64_t DEFAULT_MAX_OUTPUT_SIZE = 67108864; // bytes, 64MB
|
||||||
|
|
||||||
const int64_t DEFAULT_CHUNK_MANAGER_REQUEST_TIMEOUT_MS = 10000;
|
const int64_t DEFAULT_CHUNK_MANAGER_REQUEST_TIMEOUT_MS = 10000;
|
||||||
|
|
||||||
|
const int64_t DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND = 500;
|
||||||
|
|||||||
@ -15,10 +15,12 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <yaml-cpp/yaml.h>
|
||||||
|
|
||||||
#include "index/BitmapIndex.h"
|
#include "index/BitmapIndex.h"
|
||||||
|
|
||||||
#include "common/Slice.h"
|
#include "common/Slice.h"
|
||||||
|
#include "common/Common.h"
|
||||||
#include "index/Meta.h"
|
#include "index/Meta.h"
|
||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
@ -105,8 +107,13 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||||||
}
|
}
|
||||||
total_num_rows_ = n;
|
total_num_rows_ = n;
|
||||||
|
|
||||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
if (data_.size() < DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) {
|
||||||
bitsets_[it->first] = ConvertRoaringToBitset(it->second);
|
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||||
|
bitsets_[it->first] = ConvertRoaringToBitset(it->second);
|
||||||
|
}
|
||||||
|
build_mode_ = BitmapIndexBuildMode::BITSET;
|
||||||
|
} else {
|
||||||
|
build_mode_ = BitmapIndexBuildMode::ROARING;
|
||||||
}
|
}
|
||||||
|
|
||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
@ -134,6 +141,13 @@ BitmapIndex<T>::BuildV2(const Config& config) {
|
|||||||
field_datas.push_back(field_data);
|
field_datas.push_back(field_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BuildWithFieldData(field_datas);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
BitmapIndex<T>::BuildWithFieldData(
|
||||||
|
const std::vector<FieldDataPtr>& field_datas) {
|
||||||
int total_num_rows = 0;
|
int total_num_rows = 0;
|
||||||
for (auto& field_data : field_datas) {
|
for (auto& field_data : field_datas) {
|
||||||
total_num_rows += field_data->get_num_rows();
|
total_num_rows += field_data->get_num_rows();
|
||||||
@ -142,7 +156,6 @@ BitmapIndex<T>::BuildV2(const Config& config) {
|
|||||||
throw SegcoreError(DataIsEmpty,
|
throw SegcoreError(DataIsEmpty,
|
||||||
"scalar bitmap index can not build null values");
|
"scalar bitmap index can not build null values");
|
||||||
}
|
}
|
||||||
|
|
||||||
total_num_rows_ = total_num_rows;
|
total_num_rows_ = total_num_rows;
|
||||||
|
|
||||||
int64_t offset = 0;
|
int64_t offset = 0;
|
||||||
@ -154,6 +167,7 @@ BitmapIndex<T>::BuildV2(const Config& config) {
|
|||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,6 +204,22 @@ BitmapIndex<T>::SerializeIndexData(uint8_t* data_ptr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::pair<std::shared_ptr<uint8_t[]>, size_t>
|
||||||
|
BitmapIndex<T>::SerializeIndexMeta() {
|
||||||
|
YAML::Node node;
|
||||||
|
node[BITMAP_INDEX_LENGTH] = data_.size();
|
||||||
|
node[BITMAP_INDEX_NUM_ROWS] = total_num_rows_;
|
||||||
|
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << node;
|
||||||
|
auto json_string = ss.str();
|
||||||
|
auto str_size = json_string.size();
|
||||||
|
std::shared_ptr<uint8_t[]> res(new uint8_t[str_size]);
|
||||||
|
memcpy(res.get(), json_string.data(), str_size);
|
||||||
|
return std::make_pair(res, str_size);
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void
|
void
|
||||||
BitmapIndex<std::string>::SerializeIndexData(uint8_t* data_ptr) {
|
BitmapIndex<std::string>::SerializeIndexData(uint8_t* data_ptr) {
|
||||||
@ -217,21 +247,17 @@ BitmapIndex<T>::Serialize(const Config& config) {
|
|||||||
uint8_t* data_ptr = index_data.get();
|
uint8_t* data_ptr = index_data.get();
|
||||||
SerializeIndexData(data_ptr);
|
SerializeIndexData(data_ptr);
|
||||||
|
|
||||||
std::shared_ptr<uint8_t[]> index_length(new uint8_t[sizeof(size_t)]);
|
auto index_meta = SerializeIndexMeta();
|
||||||
auto index_size = data_.size();
|
|
||||||
memcpy(index_length.get(), &index_size, sizeof(size_t));
|
|
||||||
|
|
||||||
std::shared_ptr<uint8_t[]> num_rows(new uint8_t[sizeof(size_t)]);
|
|
||||||
memcpy(num_rows.get(), &total_num_rows_, sizeof(size_t));
|
|
||||||
|
|
||||||
BinarySet ret_set;
|
BinarySet ret_set;
|
||||||
ret_set.Append(BITMAP_INDEX_DATA, index_data, index_data_size);
|
ret_set.Append(BITMAP_INDEX_DATA, index_data, index_data_size);
|
||||||
ret_set.Append(BITMAP_INDEX_LENGTH, index_length, sizeof(size_t));
|
ret_set.Append(BITMAP_INDEX_META, index_meta.first, index_meta.second);
|
||||||
ret_set.Append(BITMAP_INDEX_NUM_ROWS, num_rows, sizeof(size_t));
|
|
||||||
|
|
||||||
LOG_INFO("build bitmap index with cardinality = {}, num_rows = {}",
|
LOG_INFO("build bitmap index with cardinality = {}, num_rows = {}",
|
||||||
index_size,
|
Cardinality(),
|
||||||
total_num_rows_);
|
total_num_rows_);
|
||||||
|
|
||||||
|
Disassemble(ret_set);
|
||||||
return ret_set;
|
return ret_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,6 +309,29 @@ BitmapIndex<T>::ConvertRoaringToBitset(const roaring::Roaring& values) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::pair<size_t, size_t>
|
||||||
|
BitmapIndex<T>::DeserializeIndexMeta(const uint8_t* data_ptr,
|
||||||
|
size_t data_size) {
|
||||||
|
YAML::Node node = YAML::Load(
|
||||||
|
std::string(reinterpret_cast<const char*>(data_ptr), data_size));
|
||||||
|
|
||||||
|
auto index_length = node[BITMAP_INDEX_LENGTH].as<size_t>();
|
||||||
|
auto index_num_rows = node[BITMAP_INDEX_NUM_ROWS].as<size_t>();
|
||||||
|
|
||||||
|
return std::make_pair(index_length, index_num_rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
BitmapIndex<T>::ChooseIndexBuildMode() {
|
||||||
|
if (data_.size() <= DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) {
|
||||||
|
build_mode_ = BitmapIndexBuildMode::BITSET;
|
||||||
|
} else {
|
||||||
|
build_mode_ = BitmapIndexBuildMode::ROARING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
||||||
@ -296,7 +345,12 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||||||
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
|
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
|
||||||
data_ptr += value.getSizeInBytes();
|
data_ptr += value.getSizeInBytes();
|
||||||
|
|
||||||
bitsets_[key] = ConvertRoaringToBitset(value);
|
ChooseIndexBuildMode();
|
||||||
|
|
||||||
|
if (build_mode_ == BitmapIndexBuildMode::BITSET) {
|
||||||
|
bitsets_[key] = ConvertRoaringToBitset(value);
|
||||||
|
data_.erase(key);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,21 +378,14 @@ template <typename T>
|
|||||||
void
|
void
|
||||||
BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
||||||
const Config& config) {
|
const Config& config) {
|
||||||
size_t index_length;
|
auto index_meta_buffer = binary_set.GetByName(BITMAP_INDEX_META);
|
||||||
auto index_length_buffer = binary_set.GetByName(BITMAP_INDEX_LENGTH);
|
auto index_meta = DeserializeIndexMeta(index_meta_buffer->data.get(),
|
||||||
memcpy(&index_length,
|
index_meta_buffer->size);
|
||||||
index_length_buffer->data.get(),
|
auto index_length = index_meta.first;
|
||||||
(size_t)index_length_buffer->size);
|
total_num_rows_ = index_meta.second;
|
||||||
|
|
||||||
auto num_rows_buffer = binary_set.GetByName(BITMAP_INDEX_NUM_ROWS);
|
|
||||||
memcpy(&total_num_rows_,
|
|
||||||
num_rows_buffer->data.get(),
|
|
||||||
(size_t)num_rows_buffer->size);
|
|
||||||
|
|
||||||
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
||||||
const uint8_t* data_ptr = index_data_buffer->data.get();
|
DeserializeIndexData(index_data_buffer->data.get(), index_length);
|
||||||
|
|
||||||
DeserializeIndexData(data_ptr, index_length);
|
|
||||||
|
|
||||||
LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}",
|
LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}",
|
||||||
Cardinality(),
|
Cardinality(),
|
||||||
@ -416,26 +463,24 @@ BitmapIndex<T>::In(const size_t n, const T* values) {
|
|||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, false);
|
TargetBitmap res(total_num_rows_, false);
|
||||||
|
|
||||||
#if 0
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
roaring::Roaring result;
|
for (size_t i = 0; i < n; ++i) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
auto val = values[i];
|
||||||
auto val = values[i];
|
auto it = data_.find(val);
|
||||||
auto it = data_.find(val);
|
if (it != data_.end()) {
|
||||||
if (it != data_.end()) {
|
for (const auto& v : it->second) {
|
||||||
result |= it->second;
|
res.set(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
auto val = values[i];
|
||||||
|
if (bitsets_.find(val) != bitsets_.end()) {
|
||||||
|
res |= bitsets_.at(val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto& val : result) {
|
|
||||||
res.set(val);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (size_t i = 0; i < n; ++i) {
|
|
||||||
auto val = values[i];
|
|
||||||
if (bitsets_.find(val) != bitsets_.end()) {
|
|
||||||
res |= bitsets_.at(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -443,36 +488,35 @@ template <typename T>
|
|||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, false);
|
|
||||||
|
|
||||||
#if 0
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
roaring::Roaring result;
|
TargetBitmap res(total_num_rows_, true);
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto val = values[i];
|
auto val = values[i];
|
||||||
auto it = data_.find(val);
|
auto it = data_.find(val);
|
||||||
if (it != data_.end()) {
|
if (it != data_.end()) {
|
||||||
result |= it->second;
|
for (const auto& v : it->second) {
|
||||||
|
res.reset(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
return res;
|
||||||
|
} else {
|
||||||
for (auto& val : result) {
|
TargetBitmap res(total_num_rows_, false);
|
||||||
bitset.reset(val);
|
for (size_t i = 0; i < n; ++i) {
|
||||||
}
|
auto val = values[i];
|
||||||
#else
|
if (bitsets_.find(val) != bitsets_.end()) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
res |= bitsets_.at(val);
|
||||||
auto val = values[i];
|
}
|
||||||
if (bitsets_.find(val) != bitsets_.end()) {
|
|
||||||
res |= bitsets_.at(val);
|
|
||||||
}
|
}
|
||||||
|
res.flip();
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
res.flip();
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const TargetBitmap
|
TargetBitmap
|
||||||
BitmapIndex<T>::Range(const T value, const OpType op) {
|
BitmapIndex<T>::RangeForBitset(const T value, const OpType op) {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, false);
|
TargetBitmap res(total_num_rows_, false);
|
||||||
if (ShouldSkip(value, value, op)) {
|
if (ShouldSkip(value, value, op)) {
|
||||||
@ -532,10 +576,82 @@ BitmapIndex<T>::Range(const T value, const OpType op) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
BitmapIndex<T>::Range(const T lower_value,
|
BitmapIndex<T>::Range(const T value, OpType op) {
|
||||||
bool lb_inclusive,
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
const T upper_value,
|
return std::move(RangeForRoaring(value, op));
|
||||||
bool ub_inclusive) {
|
} else {
|
||||||
|
return std::move(RangeForBitset(value, op));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
BitmapIndex<T>::RangeForRoaring(const T value, const OpType op) {
|
||||||
|
AssertInfo(is_built_, "index has not been built");
|
||||||
|
TargetBitmap res(total_num_rows_, false);
|
||||||
|
if (ShouldSkip(value, value, op)) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
auto lb = data_.begin();
|
||||||
|
auto ub = data_.end();
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case OpType::LessThan: {
|
||||||
|
ub = std::lower_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OpType::LessEqual: {
|
||||||
|
ub = std::upper_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OpType::GreaterThan: {
|
||||||
|
lb = std::upper_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OpType::GreaterEqual: {
|
||||||
|
lb = std::lower_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
throw SegcoreError(OpTypeInvalid,
|
||||||
|
fmt::format("Invalid OperatorType: {}", op));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; lb != ub; lb++) {
|
||||||
|
for (const auto& v : lb->second) {
|
||||||
|
res.set(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
BitmapIndex<T>::RangeForBitset(const T lower_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
const T upper_value,
|
||||||
|
bool ub_inclusive) {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, false);
|
TargetBitmap res(total_num_rows_, false);
|
||||||
if (lower_value > upper_value ||
|
if (lower_value > upper_value ||
|
||||||
@ -587,15 +703,99 @@ BitmapIndex<T>::Range(const T lower_value,
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const TargetBitmap
|
||||||
|
BitmapIndex<T>::Range(const T lower_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
const T upper_value,
|
||||||
|
bool ub_inclusive) {
|
||||||
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
|
return RangeForRoaring(
|
||||||
|
lower_value, lb_inclusive, upper_value, ub_inclusive);
|
||||||
|
} else {
|
||||||
|
return RangeForBitset(
|
||||||
|
lower_value, lb_inclusive, upper_value, ub_inclusive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
BitmapIndex<T>::RangeForRoaring(const T lower_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
const T upper_value,
|
||||||
|
bool ub_inclusive) {
|
||||||
|
AssertInfo(is_built_, "index has not been built");
|
||||||
|
TargetBitmap res(total_num_rows_, false);
|
||||||
|
if (lower_value > upper_value ||
|
||||||
|
(lower_value == upper_value && !(lb_inclusive && ub_inclusive))) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if (ShouldSkip(lower_value, upper_value, OpType::Range)) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto lb = data_.begin();
|
||||||
|
auto ub = data_.end();
|
||||||
|
|
||||||
|
if (lb_inclusive) {
|
||||||
|
lb = std::lower_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(lower_value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
lb = std::upper_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(lower_value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ub_inclusive) {
|
||||||
|
ub = std::upper_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(upper_value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ub = std::lower_bound(data_.begin(),
|
||||||
|
data_.end(),
|
||||||
|
std::make_pair(upper_value, TargetBitmap()),
|
||||||
|
[](const auto& lhs, const auto& rhs) {
|
||||||
|
return lhs.first < rhs.first;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; lb != ub; lb++) {
|
||||||
|
for (const auto& v : lb->second) {
|
||||||
|
res.set(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T
|
T
|
||||||
BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
AssertInfo(idx < total_num_rows_, "out of range of total coun");
|
AssertInfo(idx < total_num_rows_, "out of range of total coun");
|
||||||
|
|
||||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
if (it->second[idx]) {
|
for (auto it = data_.begin(); it != data_.end(); it++) {
|
||||||
return it->first;
|
for (const auto& v : it->second) {
|
||||||
|
if (v == idx) {
|
||||||
|
return it->first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
||||||
|
if (it->second[idx]) {
|
||||||
|
return it->first;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw SegcoreError(
|
throw SegcoreError(
|
||||||
@ -610,9 +810,7 @@ bool
|
|||||||
BitmapIndex<T>::ShouldSkip(const T lower_value,
|
BitmapIndex<T>::ShouldSkip(const T lower_value,
|
||||||
const T upper_value,
|
const T upper_value,
|
||||||
const OpType op) {
|
const OpType op) {
|
||||||
if (!bitsets_.empty()) {
|
auto skip = [&](OpType op, T lower_bound, T upper_bound) -> bool {
|
||||||
auto lower_bound = bitsets_.begin()->first;
|
|
||||||
auto upper_bound = bitsets_.rbegin()->first;
|
|
||||||
bool should_skip = false;
|
bool should_skip = false;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case OpType::LessThan: {
|
case OpType::LessThan: {
|
||||||
@ -649,6 +847,22 @@ BitmapIndex<T>::ShouldSkip(const T lower_value,
|
|||||||
op));
|
op));
|
||||||
}
|
}
|
||||||
return should_skip;
|
return should_skip;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
|
if (!data_.empty()) {
|
||||||
|
auto lower_bound = data_.begin()->first;
|
||||||
|
auto upper_bound = data_.rbegin()->first;
|
||||||
|
bool should_skip = skip(op, lower_bound, upper_bound);
|
||||||
|
return should_skip;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!bitsets_.empty()) {
|
||||||
|
auto lower_bound = bitsets_.begin()->first;
|
||||||
|
auto upper_bound = bitsets_.rbegin()->first;
|
||||||
|
bool should_skip = skip(op, lower_bound, upper_bound);
|
||||||
|
return should_skip;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,6 +30,11 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace index {
|
namespace index {
|
||||||
|
|
||||||
|
enum class BitmapIndexBuildMode {
|
||||||
|
ROARING,
|
||||||
|
BITSET,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Implementation of Bitmap Index
|
* @brief Implementation of Bitmap Index
|
||||||
* @details This index only for scalar Integral type.
|
* @details This index only for scalar Integral type.
|
||||||
@ -45,6 +50,17 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
const storage::FileManagerContext& file_manager_context,
|
const storage::FileManagerContext& file_manager_context,
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
|
explicit BitmapIndex(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
|
||||||
|
: file_manager_(file_manager) {
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit BitmapIndex(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
|
||||||
|
std::shared_ptr<milvus_storage::Space> space)
|
||||||
|
: file_manager_(file_manager), space_(space) {
|
||||||
|
}
|
||||||
|
|
||||||
~BitmapIndex() override = default;
|
~BitmapIndex() override = default;
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
@ -61,7 +77,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return bitsets_.begin()->second.size();
|
return total_num_rows_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -70,6 +86,9 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildV2(const Config& config = {}) override;
|
BuildV2(const Config& config = {}) override;
|
||||||
|
|
||||||
@ -108,9 +127,17 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Cardinality() {
|
Cardinality() {
|
||||||
return bitsets_.size();
|
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||||
|
return data_.size();
|
||||||
|
} else {
|
||||||
|
return bitsets_.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
LoadWithoutAssemble(const BinarySet& binary_set,
|
||||||
|
const Config& config) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t
|
size_t
|
||||||
GetIndexDataSize();
|
GetIndexDataSize();
|
||||||
@ -118,24 +145,49 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
SerializeIndexData(uint8_t* index_data_ptr);
|
SerializeIndexData(uint8_t* index_data_ptr);
|
||||||
|
|
||||||
|
std::pair<std::shared_ptr<uint8_t[]>, size_t>
|
||||||
|
SerializeIndexMeta();
|
||||||
|
|
||||||
|
std::pair<size_t, size_t>
|
||||||
|
DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size);
|
||||||
|
|
||||||
void
|
void
|
||||||
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);
|
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);
|
||||||
|
|
||||||
|
void
|
||||||
|
ChooseIndexBuildMode();
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ShouldSkip(const T lower_value, const T upper_value, const OpType op);
|
ShouldSkip(const T lower_value, const T upper_value, const OpType op);
|
||||||
|
|
||||||
TargetBitmap
|
TargetBitmap
|
||||||
ConvertRoaringToBitset(const roaring::Roaring& values);
|
ConvertRoaringToBitset(const roaring::Roaring& values);
|
||||||
|
|
||||||
void
|
TargetBitmap
|
||||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
RangeForRoaring(T value, OpType op);
|
||||||
|
|
||||||
private:
|
TargetBitmap
|
||||||
bool is_built_;
|
RangeForBitset(T value, OpType op);
|
||||||
|
|
||||||
|
TargetBitmap
|
||||||
|
RangeForRoaring(T lower_bound_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
T upper_bound_value,
|
||||||
|
bool ub_inclusive);
|
||||||
|
|
||||||
|
TargetBitmap
|
||||||
|
RangeForBitset(T lower_bound_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
T upper_bound_value,
|
||||||
|
bool ub_inclusive);
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool is_built_{false};
|
||||||
Config config_;
|
Config config_;
|
||||||
|
BitmapIndexBuildMode build_mode_;
|
||||||
std::map<T, roaring::Roaring> data_;
|
std::map<T, roaring::Roaring> data_;
|
||||||
std::map<T, TargetBitmap> bitsets_;
|
std::map<T, TargetBitmap> bitsets_;
|
||||||
size_t total_num_rows_;
|
size_t total_num_rows_{0};
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
std::shared_ptr<milvus_storage::Space> space_;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -20,6 +20,7 @@ set(INDEX_FILES
|
|||||||
SkipIndex.cpp
|
SkipIndex.cpp
|
||||||
InvertedIndexTantivy.cpp
|
InvertedIndexTantivy.cpp
|
||||||
BitmapIndex.cpp
|
BitmapIndex.cpp
|
||||||
|
HybridScalarIndex.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
milvus_add_pkg_config("milvus_index")
|
milvus_add_pkg_config("milvus_index")
|
||||||
|
|||||||
402
internal/core/src/index/HybridScalarIndex.cpp
Normal file
402
internal/core/src/index/HybridScalarIndex.cpp
Normal file
@ -0,0 +1,402 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "index/HybridScalarIndex.h"
|
||||||
|
#include "common/Slice.h"
|
||||||
|
#include "common/Common.h"
|
||||||
|
#include "index/Meta.h"
|
||||||
|
#include "index/ScalarIndex.h"
|
||||||
|
#include "index/Utils.h"
|
||||||
|
#include "storage/Util.h"
|
||||||
|
#include "storage/space.h"
|
||||||
|
|
||||||
|
namespace milvus {
|
||||||
|
namespace index {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HybridScalarIndex<T>::HybridScalarIndex(
|
||||||
|
const storage::FileManagerContext& file_manager_context)
|
||||||
|
: is_built_(false),
|
||||||
|
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) {
|
||||||
|
if (file_manager_context.Valid()) {
|
||||||
|
file_manager_ =
|
||||||
|
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
|
||||||
|
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||||
|
}
|
||||||
|
internal_index_type_ = InternalIndexType::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HybridScalarIndex<T>::HybridScalarIndex(
|
||||||
|
const storage::FileManagerContext& file_manager_context,
|
||||||
|
std::shared_ptr<milvus_storage::Space> space)
|
||||||
|
: is_built_(false),
|
||||||
|
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
|
||||||
|
space_(space) {
|
||||||
|
if (file_manager_context.Valid()) {
|
||||||
|
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||||
|
file_manager_context, space);
|
||||||
|
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||||
|
}
|
||||||
|
internal_index_type_ = InternalIndexType::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
InternalIndexType
|
||||||
|
HybridScalarIndex<T>::SelectIndexBuildType(size_t n, const T* values) {
|
||||||
|
std::set<T> distinct_vals;
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
distinct_vals.insert(values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide whether to select bitmap index or stl sort
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
internal_index_type_ = InternalIndexType::STLSORT;
|
||||||
|
} else {
|
||||||
|
internal_index_type_ = InternalIndexType::BITMAP;
|
||||||
|
}
|
||||||
|
return internal_index_type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
InternalIndexType
|
||||||
|
HybridScalarIndex<std::string>::SelectIndexBuildType(
|
||||||
|
size_t n, const std::string* values) {
|
||||||
|
std::set<std::string> distinct_vals;
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
distinct_vals.insert(values[i]);
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide whether to select bitmap index or marisa index
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
internal_index_type_ = InternalIndexType::MARISA;
|
||||||
|
} else {
|
||||||
|
internal_index_type_ = InternalIndexType::BITMAP;
|
||||||
|
}
|
||||||
|
return internal_index_type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
InternalIndexType
|
||||||
|
HybridScalarIndex<T>::SelectIndexBuildType(
|
||||||
|
const std::vector<FieldDataPtr>& field_datas) {
|
||||||
|
std::set<T> distinct_vals;
|
||||||
|
for (const auto& data : field_datas) {
|
||||||
|
auto slice_row_num = data->get_num_rows();
|
||||||
|
for (size_t i = 0; i < slice_row_num; ++i) {
|
||||||
|
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
||||||
|
distinct_vals.insert(*val);
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide whether to select bitmap index or stl sort
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
internal_index_type_ = InternalIndexType::STLSORT;
|
||||||
|
} else {
|
||||||
|
internal_index_type_ = InternalIndexType::BITMAP;
|
||||||
|
}
|
||||||
|
return internal_index_type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
InternalIndexType
|
||||||
|
HybridScalarIndex<std::string>::SelectIndexBuildType(
|
||||||
|
const std::vector<FieldDataPtr>& field_datas) {
|
||||||
|
std::set<std::string> distinct_vals;
|
||||||
|
for (const auto& data : field_datas) {
|
||||||
|
auto slice_row_num = data->get_num_rows();
|
||||||
|
for (size_t i = 0; i < slice_row_num; ++i) {
|
||||||
|
auto val = reinterpret_cast<const std::string*>(data->RawValue(i));
|
||||||
|
distinct_vals.insert(*val);
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide whether to select bitmap index or marisa sort
|
||||||
|
if (distinct_vals.size() >= bitmap_index_cardinality_limit_) {
|
||||||
|
internal_index_type_ = InternalIndexType::MARISA;
|
||||||
|
} else {
|
||||||
|
internal_index_type_ = InternalIndexType::BITMAP;
|
||||||
|
}
|
||||||
|
return internal_index_type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::shared_ptr<ScalarIndex<T>>
|
||||||
|
HybridScalarIndex<T>::GetInternalIndex() {
|
||||||
|
if (internal_index_ != nullptr) {
|
||||||
|
return internal_index_;
|
||||||
|
}
|
||||||
|
if (internal_index_type_ == InternalIndexType::BITMAP) {
|
||||||
|
internal_index_ = std::make_shared<BitmapIndex<T>>(file_manager_);
|
||||||
|
} else if (internal_index_type_ == InternalIndexType::STLSORT) {
|
||||||
|
internal_index_ = std::make_shared<ScalarIndexSort<T>>(file_manager_);
|
||||||
|
} else {
|
||||||
|
PanicInfo(UnexpectedError,
|
||||||
|
"unknown index type when get internal index");
|
||||||
|
}
|
||||||
|
return internal_index_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
std::shared_ptr<ScalarIndex<std::string>>
|
||||||
|
HybridScalarIndex<std::string>::GetInternalIndex() {
|
||||||
|
if (internal_index_ != nullptr) {
|
||||||
|
return internal_index_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (internal_index_type_ == InternalIndexType::BITMAP) {
|
||||||
|
internal_index_ =
|
||||||
|
std::make_shared<BitmapIndex<std::string>>(file_manager_);
|
||||||
|
} else if (internal_index_type_ == InternalIndexType::MARISA) {
|
||||||
|
internal_index_ = std::make_shared<StringIndexMarisa>(file_manager_);
|
||||||
|
} else {
|
||||||
|
PanicInfo(UnexpectedError,
|
||||||
|
"unknown index type when get internal index");
|
||||||
|
}
|
||||||
|
return internal_index_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::BuildInternal(
|
||||||
|
const std::vector<FieldDataPtr>& field_datas) {
|
||||||
|
auto index = GetInternalIndex();
|
||||||
|
index->BuildWithFieldData(field_datas);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::Build(const Config& config) {
|
||||||
|
if (is_built_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bitmap_index_cardinality_limit_ =
|
||||||
|
GetBitmapCardinalityLimitFromConfig(config);
|
||||||
|
LOG_INFO("config bitmap cardinality limit to {}",
|
||||||
|
bitmap_index_cardinality_limit_);
|
||||||
|
|
||||||
|
auto insert_files =
|
||||||
|
GetValueFromConfig<std::vector<std::string>>(config, "insert_files");
|
||||||
|
AssertInfo(insert_files.has_value(),
|
||||||
|
"insert file paths is empty when build index");
|
||||||
|
|
||||||
|
auto field_datas =
|
||||||
|
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||||
|
|
||||||
|
SelectIndexBuildType(field_datas);
|
||||||
|
BuildInternal(field_datas);
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::BuildV2(const Config& config) {
|
||||||
|
if (is_built_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bitmap_index_cardinality_limit_ =
|
||||||
|
GetBitmapCardinalityLimitFromConfig(config);
|
||||||
|
LOG_INFO("config bitmap cardinality limit to {}",
|
||||||
|
bitmap_index_cardinality_limit_);
|
||||||
|
|
||||||
|
auto field_name = file_manager_->GetIndexMeta().field_name;
|
||||||
|
auto reader = space_->ScanData();
|
||||||
|
std::vector<FieldDataPtr> field_datas;
|
||||||
|
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||||
|
if (!rec.ok()) {
|
||||||
|
PanicInfo(DataFormatBroken, "failed to read data");
|
||||||
|
}
|
||||||
|
auto data = rec.ValueUnsafe();
|
||||||
|
auto total_num_rows = data->num_rows();
|
||||||
|
auto col_data = data->GetColumnByName(field_name);
|
||||||
|
auto field_data = storage::CreateFieldData(
|
||||||
|
DataType(GetDType<T>()), 0, total_num_rows);
|
||||||
|
field_data->FillFieldData(col_data);
|
||||||
|
field_datas.push_back(field_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
SelectIndexBuildType(field_datas);
|
||||||
|
BuildInternal(field_datas);
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
BinarySet
|
||||||
|
HybridScalarIndex<T>::Serialize(const Config& config) {
|
||||||
|
AssertInfo(is_built_, "index has not been built yet");
|
||||||
|
|
||||||
|
auto ret_set = internal_index_->Serialize(config);
|
||||||
|
|
||||||
|
// Add index type info to storage for future restruct index
|
||||||
|
std::shared_ptr<uint8_t[]> index_type_buf(new uint8_t[sizeof(uint8_t)]);
|
||||||
|
index_type_buf[0] = static_cast<uint8_t>(internal_index_type_);
|
||||||
|
ret_set.Append(INDEX_TYPE, index_type_buf, sizeof(uint8_t));
|
||||||
|
|
||||||
|
return ret_set;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
BinarySet
|
||||||
|
HybridScalarIndex<T>::Upload(const Config& config) {
|
||||||
|
auto binary_set = Serialize(config);
|
||||||
|
file_manager_->AddFile(binary_set);
|
||||||
|
|
||||||
|
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||||
|
BinarySet ret;
|
||||||
|
for (auto& file : remote_paths_to_size) {
|
||||||
|
ret.Append(file.first, nullptr, file.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
BinarySet
|
||||||
|
HybridScalarIndex<T>::UploadV2(const Config& config) {
|
||||||
|
auto binary_set = Serialize(config);
|
||||||
|
file_manager_->AddFileV2(binary_set);
|
||||||
|
|
||||||
|
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||||
|
BinarySet ret;
|
||||||
|
for (auto& file : remote_paths_to_size) {
|
||||||
|
ret.Append(file.first, nullptr, file.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
||||||
|
uint8_t index_type;
|
||||||
|
auto index_type_buffer = binary_set.GetByName(INDEX_TYPE);
|
||||||
|
memcpy(&index_type, index_type_buffer->data.get(), index_type_buffer->size);
|
||||||
|
internal_index_type_ = static_cast<InternalIndexType>(index_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::LoadInternal(const BinarySet& binary_set,
|
||||||
|
const Config& config) {
|
||||||
|
auto index = GetInternalIndex();
|
||||||
|
index->LoadWithoutAssemble(binary_set, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
||||||
|
milvus::Assemble(const_cast<BinarySet&>(binary_set));
|
||||||
|
DeserializeIndexType(binary_set);
|
||||||
|
|
||||||
|
LoadInternal(binary_set, config);
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::LoadV2(const Config& config) {
|
||||||
|
auto blobs = space_->StatisticsBlobs();
|
||||||
|
std::vector<std::string> index_files;
|
||||||
|
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||||
|
for (auto& b : blobs) {
|
||||||
|
if (b.name.rfind(prefix, 0) == 0) {
|
||||||
|
index_files.push_back(b.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::map<std::string, FieldDataPtr> index_datas{};
|
||||||
|
for (auto& file_name : index_files) {
|
||||||
|
auto res = space_->GetBlobByteSize(file_name);
|
||||||
|
if (!res.ok()) {
|
||||||
|
PanicInfo(S3Error, "unable to read index blob");
|
||||||
|
}
|
||||||
|
auto index_blob_data =
|
||||||
|
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||||
|
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||||
|
if (!status.ok()) {
|
||||||
|
PanicInfo(S3Error, "unable to read index blob");
|
||||||
|
}
|
||||||
|
auto raw_index_blob =
|
||||||
|
storage::DeserializeFileData(index_blob_data, res.value());
|
||||||
|
auto key = file_name.substr(file_name.find_last_of('/') + 1);
|
||||||
|
index_datas[key] = raw_index_blob->GetFieldData();
|
||||||
|
}
|
||||||
|
AssembleIndexDatas(index_datas);
|
||||||
|
|
||||||
|
BinarySet binary_set;
|
||||||
|
for (auto& [key, data] : index_datas) {
|
||||||
|
auto size = data->Size();
|
||||||
|
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||||
|
auto buf = std::shared_ptr<uint8_t[]>(
|
||||||
|
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||||
|
binary_set.Append(key, buf, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
DeserializeIndexType(binary_set);
|
||||||
|
|
||||||
|
LoadInternal(binary_set, config);
|
||||||
|
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
||||||
|
const Config& config) {
|
||||||
|
auto index_files =
|
||||||
|
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||||
|
AssertInfo(index_files.has_value(),
|
||||||
|
"index file paths is empty when load bitmap index");
|
||||||
|
auto index_datas = file_manager_->LoadIndexToMemory(index_files.value());
|
||||||
|
AssembleIndexDatas(index_datas);
|
||||||
|
BinarySet binary_set;
|
||||||
|
for (auto& [key, data] : index_datas) {
|
||||||
|
auto size = data->Size();
|
||||||
|
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||||
|
auto buf = std::shared_ptr<uint8_t[]>(
|
||||||
|
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||||
|
binary_set.Append(key, buf, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
DeserializeIndexType(binary_set);
|
||||||
|
|
||||||
|
LoadInternal(binary_set, config);
|
||||||
|
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template class HybridScalarIndex<bool>;
|
||||||
|
template class HybridScalarIndex<int8_t>;
|
||||||
|
template class HybridScalarIndex<int16_t>;
|
||||||
|
template class HybridScalarIndex<int32_t>;
|
||||||
|
template class HybridScalarIndex<int64_t>;
|
||||||
|
template class HybridScalarIndex<float>;
|
||||||
|
template class HybridScalarIndex<double>;
|
||||||
|
template class HybridScalarIndex<std::string>;
|
||||||
|
|
||||||
|
} // namespace index
|
||||||
|
} // namespace milvus
|
||||||
166
internal/core/src/index/HybridScalarIndex.h
Normal file
166
internal/core/src/index/HybridScalarIndex.h
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "index/ScalarIndex.h"
|
||||||
|
#include "index/BitmapIndex.h"
|
||||||
|
#include "index/ScalarIndexSort.h"
|
||||||
|
#include "index/StringIndexMarisa.h"
|
||||||
|
#include "storage/FileManager.h"
|
||||||
|
#include "storage/DiskFileManagerImpl.h"
|
||||||
|
#include "storage/MemFileManagerImpl.h"
|
||||||
|
#include "storage/space.h"
|
||||||
|
|
||||||
|
namespace milvus {
|
||||||
|
namespace index {
|
||||||
|
|
||||||
|
enum class InternalIndexType {
|
||||||
|
NONE = 0,
|
||||||
|
BITMAP,
|
||||||
|
STLSORT,
|
||||||
|
MARISA,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief Implementation of hybrid index
|
||||||
|
* @details This index only for scalar type.
|
||||||
|
* dynamically choose bitmap/stlsort/marisa type index
|
||||||
|
* according to data distribution
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class HybridScalarIndex : public ScalarIndex<T> {
|
||||||
|
public:
|
||||||
|
explicit HybridScalarIndex(
|
||||||
|
const storage::FileManagerContext& file_manager_context =
|
||||||
|
storage::FileManagerContext());
|
||||||
|
|
||||||
|
explicit HybridScalarIndex(
|
||||||
|
const storage::FileManagerContext& file_manager_context,
|
||||||
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
|
~HybridScalarIndex() override = default;
|
||||||
|
|
||||||
|
BinarySet
|
||||||
|
Serialize(const Config& config) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
Load(const BinarySet& index_binary, const Config& config = {}) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
LoadV2(const Config& config = {}) override;
|
||||||
|
|
||||||
|
int64_t
|
||||||
|
Count() override {
|
||||||
|
return internal_index_->Count();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Build(size_t n, const T* values) override {
|
||||||
|
SelectIndexBuildType(n, values);
|
||||||
|
auto index = GetInternalIndex();
|
||||||
|
index->Build(n, values);
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
BuildV2(const Config& config = {}) override;
|
||||||
|
|
||||||
|
const TargetBitmap
|
||||||
|
In(size_t n, const T* values) override {
|
||||||
|
return internal_index_->In(n, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetBitmap
|
||||||
|
NotIn(size_t n, const T* values) override {
|
||||||
|
return internal_index_->NotIn(n, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetBitmap
|
||||||
|
Range(T value, OpType op) override {
|
||||||
|
return internal_index_->Range(value, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetBitmap
|
||||||
|
Range(T lower_bound_value,
|
||||||
|
bool lb_inclusive,
|
||||||
|
T upper_bound_value,
|
||||||
|
bool ub_inclusive) override {
|
||||||
|
return internal_index_->Range(
|
||||||
|
lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
T
|
||||||
|
Reverse_Lookup(size_t offset) const override {
|
||||||
|
return internal_index_->Reverse_Lookup(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t
|
||||||
|
Size() override {
|
||||||
|
return internal_index_->Size();
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool
|
||||||
|
HasRawData() const override {
|
||||||
|
return internal_index_->HasRawData();
|
||||||
|
}
|
||||||
|
|
||||||
|
BinarySet
|
||||||
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
|
BinarySet
|
||||||
|
UploadV2(const Config& config = {}) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
InternalIndexType
|
||||||
|
SelectIndexBuildType(const std::vector<FieldDataPtr>& field_datas);
|
||||||
|
|
||||||
|
InternalIndexType
|
||||||
|
SelectIndexBuildType(size_t n, const T* values);
|
||||||
|
|
||||||
|
void
|
||||||
|
DeserializeIndexType(const BinarySet& binary_set);
|
||||||
|
|
||||||
|
void
|
||||||
|
BuildInternal(const std::vector<FieldDataPtr>& field_datas);
|
||||||
|
|
||||||
|
void
|
||||||
|
LoadInternal(const BinarySet& binary_set, const Config& config);
|
||||||
|
|
||||||
|
std::shared_ptr<ScalarIndex<T>>
|
||||||
|
GetInternalIndex();
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool is_built_{false};
|
||||||
|
int32_t bitmap_index_cardinality_limit_;
|
||||||
|
InternalIndexType internal_index_type_;
|
||||||
|
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
|
||||||
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_{nullptr};
|
||||||
|
std::shared_ptr<milvus_storage::Space> space_{nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace index
|
||||||
|
} // namespace milvus
|
||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <boost/dynamic_bitset.hpp>
|
#include <boost/dynamic_bitset.hpp>
|
||||||
|
#include "common/FieldData.h"
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "knowhere/comp/index_param.h"
|
#include "knowhere/comp/index_param.h"
|
||||||
#include "knowhere/dataset.h"
|
#include "knowhere/dataset.h"
|
||||||
|
|||||||
@ -27,7 +27,7 @@
|
|||||||
#include "index/StringIndexMarisa.h"
|
#include "index/StringIndexMarisa.h"
|
||||||
#include "index/BoolIndex.h"
|
#include "index/BoolIndex.h"
|
||||||
#include "index/InvertedIndexTantivy.h"
|
#include "index/InvertedIndexTantivy.h"
|
||||||
#include "index/BitmapIndex.h"
|
#include "index/HybridScalarIndex.h"
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -44,7 +44,7 @@ IndexFactory::CreateScalarIndex(
|
|||||||
file_manager_context);
|
file_manager_context);
|
||||||
}
|
}
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
if (index_type == BITMAP_INDEX_TYPE) {
|
||||||
return std::make_unique<BitmapIndex<T>>(file_manager_context);
|
return std::make_unique<HybridScalarIndex<T>>(file_manager_context);
|
||||||
}
|
}
|
||||||
return CreateScalarIndexSort<T>(file_manager_context);
|
return CreateScalarIndexSort<T>(file_manager_context);
|
||||||
}
|
}
|
||||||
@ -70,7 +70,8 @@ IndexFactory::CreateScalarIndex<std::string>(
|
|||||||
cfg, file_manager_context);
|
cfg, file_manager_context);
|
||||||
}
|
}
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
if (index_type == BITMAP_INDEX_TYPE) {
|
||||||
return std::make_unique<BitmapIndex<std::string>>(file_manager_context);
|
return std::make_unique<HybridScalarIndex<std::string>>(
|
||||||
|
file_manager_context);
|
||||||
}
|
}
|
||||||
return CreateStringIndexMarisa(file_manager_context);
|
return CreateStringIndexMarisa(file_manager_context);
|
||||||
#else
|
#else
|
||||||
@ -92,7 +93,8 @@ IndexFactory::CreateScalarIndex(
|
|||||||
cfg, file_manager_context, space);
|
cfg, file_manager_context, space);
|
||||||
}
|
}
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
if (index_type == BITMAP_INDEX_TYPE) {
|
||||||
return std::make_unique<BitmapIndex<T>>(file_manager_context, space);
|
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
|
||||||
|
space);
|
||||||
}
|
}
|
||||||
return CreateScalarIndexSort<T>(file_manager_context, space);
|
return CreateScalarIndexSort<T>(file_manager_context, space);
|
||||||
}
|
}
|
||||||
@ -112,8 +114,8 @@ IndexFactory::CreateScalarIndex<std::string>(
|
|||||||
cfg, file_manager_context, space);
|
cfg, file_manager_context, space);
|
||||||
}
|
}
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
if (index_type == BITMAP_INDEX_TYPE) {
|
||||||
return std::make_unique<BitmapIndex<std::string>>(file_manager_context,
|
return std::make_unique<HybridScalarIndex<std::string>>(
|
||||||
space);
|
file_manager_context, space);
|
||||||
}
|
}
|
||||||
return CreateStringIndexMarisa(file_manager_context, space);
|
return CreateStringIndexMarisa(file_manager_context, space);
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -426,8 +426,34 @@ InvertedIndexTantivy<T>::BuildWithRawData(size_t n,
|
|||||||
const void* values,
|
const void* values,
|
||||||
const Config& config) {
|
const Config& config) {
|
||||||
if constexpr (!std::is_same_v<T, std::string>) {
|
if constexpr (!std::is_same_v<T, std::string>) {
|
||||||
PanicInfo(Unsupported,
|
TantivyConfig cfg;
|
||||||
"InvertedIndex.BuildWithRawData only support string");
|
if constexpr (std::is_same_v<int8_t, T>) {
|
||||||
|
cfg.data_type_ = DataType::INT8;
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<int16_t, T>) {
|
||||||
|
cfg.data_type_ = DataType::INT16;
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<int32_t, T>) {
|
||||||
|
cfg.data_type_ = DataType::INT32;
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<int64_t, T>) {
|
||||||
|
cfg.data_type_ = DataType::INT64;
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<std::string, T>) {
|
||||||
|
cfg.data_type_ = DataType::VARCHAR;
|
||||||
|
}
|
||||||
|
boost::uuids::random_generator generator;
|
||||||
|
auto uuid = generator();
|
||||||
|
auto prefix = boost::uuids::to_string(uuid);
|
||||||
|
path_ = fmt::format("/tmp/{}", prefix);
|
||||||
|
boost::filesystem::create_directories(path_);
|
||||||
|
cfg_ = cfg;
|
||||||
|
d_type_ = cfg_.to_tantivy_data_type();
|
||||||
|
std::string field = "test_inverted_index";
|
||||||
|
wrapper_ = std::make_shared<TantivyIndexWrapper>(
|
||||||
|
field.c_str(), d_type_, path_.c_str());
|
||||||
|
wrapper_->add_data<T>(static_cast<const T*>(values), n);
|
||||||
|
finish();
|
||||||
} else {
|
} else {
|
||||||
boost::uuids::random_generator generator;
|
boost::uuids::random_generator generator;
|
||||||
auto uuid = generator();
|
auto uuid = generator();
|
||||||
|
|||||||
@ -54,6 +54,8 @@ constexpr const char* INDEX_BUILD_ID = "index_build_id";
|
|||||||
constexpr const char* INDEX_ID = "index_id";
|
constexpr const char* INDEX_ID = "index_id";
|
||||||
constexpr const char* INDEX_VERSION = "index_version";
|
constexpr const char* INDEX_VERSION = "index_version";
|
||||||
constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version";
|
constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version";
|
||||||
|
constexpr const char* BITMAP_INDEX_CARDINALITY_LIMIT =
|
||||||
|
"bitmap_cardinality_limit";
|
||||||
|
|
||||||
// VecIndex file metas
|
// VecIndex file metas
|
||||||
constexpr const char* DISK_ANN_PREFIX_PATH = "index_prefix";
|
constexpr const char* DISK_ANN_PREFIX_PATH = "index_prefix";
|
||||||
|
|||||||
@ -80,6 +80,16 @@ class ScalarIndex : public IndexBase {
|
|||||||
RegexQuery(const std::string& pattern) {
|
RegexQuery(const std::string& pattern) {
|
||||||
PanicInfo(Unsupported, "regex query is not supported");
|
PanicInfo(Unsupported, "regex query is not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) {
|
||||||
|
PanicInfo(Unsupported, "BuildwithFieldData is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config) {
|
||||||
|
PanicInfo(Unsupported, "LoadWithoutAssemble is not supported");
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
@ -117,6 +117,35 @@ ScalarIndexSort<T>::Build(const Config& config) {
|
|||||||
auto field_datas =
|
auto field_datas =
|
||||||
file_manager_->CacheRawDataToMemory(insert_files.value());
|
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||||
|
|
||||||
|
BuildWithFieldData(field_datas);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
||||||
|
if (is_built_)
|
||||||
|
return;
|
||||||
|
if (n == 0) {
|
||||||
|
throw SegcoreError(DataIsEmpty,
|
||||||
|
"ScalarIndexSort cannot build null values!");
|
||||||
|
}
|
||||||
|
data_.reserve(n);
|
||||||
|
idx_to_offsets_.resize(n);
|
||||||
|
T* p = const_cast<T*>(values);
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
data_.emplace_back(IndexStructure(*p++, i));
|
||||||
|
}
|
||||||
|
std::sort(data_.begin(), data_.end());
|
||||||
|
for (size_t i = 0; i < data_.size(); ++i) {
|
||||||
|
idx_to_offsets_[data_[i].idx_] = i;
|
||||||
|
}
|
||||||
|
is_built_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
ScalarIndexSort<T>::BuildWithFieldData(
|
||||||
|
const std::vector<milvus::FieldDataPtr>& field_datas) {
|
||||||
int64_t total_num_rows = 0;
|
int64_t total_num_rows = 0;
|
||||||
for (const auto& data : field_datas) {
|
for (const auto& data : field_datas) {
|
||||||
total_num_rows += data->get_num_rows();
|
total_num_rows += data->get_num_rows();
|
||||||
@ -145,28 +174,6 @@ ScalarIndexSort<T>::Build(const Config& config) {
|
|||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
|
||||||
if (is_built_)
|
|
||||||
return;
|
|
||||||
if (n == 0) {
|
|
||||||
throw SegcoreError(DataIsEmpty,
|
|
||||||
"ScalarIndexSort cannot build null values!");
|
|
||||||
}
|
|
||||||
data_.reserve(n);
|
|
||||||
idx_to_offsets_.resize(n);
|
|
||||||
T* p = const_cast<T*>(values);
|
|
||||||
for (size_t i = 0; i < n; ++i) {
|
|
||||||
data_.emplace_back(IndexStructure(*p++, i));
|
|
||||||
}
|
|
||||||
std::sort(data_.begin(), data_.end());
|
|
||||||
for (size_t i = 0; i < data_.size(); ++i) {
|
|
||||||
idx_to_offsets_[data_[i].idx_] = i;
|
|
||||||
}
|
|
||||||
is_built_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
BinarySet
|
BinarySet
|
||||||
ScalarIndexSort<T>::Serialize(const Config& config) {
|
ScalarIndexSort<T>::Serialize(const Config& config) {
|
||||||
|
|||||||
@ -41,6 +41,17 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
const storage::FileManagerContext& file_manager_context,
|
const storage::FileManagerContext& file_manager_context,
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
|
explicit ScalarIndexSort(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
|
||||||
|
: file_manager_(file_manager) {
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit ScalarIndexSort(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
|
||||||
|
std::shared_ptr<milvus_storage::Space> space)
|
||||||
|
: file_manager_(file_manager), space_(space) {
|
||||||
|
}
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize(const Config& config) override;
|
Serialize(const Config& config) override;
|
||||||
|
|
||||||
@ -100,6 +111,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool
|
bool
|
||||||
ShouldSkip(const T lower_value, const T upper_value, const OpType op);
|
ShouldSkip(const T lower_value, const T upper_value, const OpType op);
|
||||||
@ -116,7 +130,8 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
LoadWithoutAssemble(const BinarySet& binary_set,
|
||||||
|
const Config& config) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool is_built_;
|
bool is_built_;
|
||||||
|
|||||||
@ -132,6 +132,13 @@ StringIndexMarisa::Build(const Config& config) {
|
|||||||
"insert file paths is empty when build index");
|
"insert file paths is empty when build index");
|
||||||
auto field_datas =
|
auto field_datas =
|
||||||
file_manager_->CacheRawDataToMemory(insert_files.value());
|
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||||
|
|
||||||
|
BuildWithFieldData(field_datas);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
StringIndexMarisa::BuildWithFieldData(
|
||||||
|
const std::vector<FieldDataPtr>& field_datas) {
|
||||||
int64_t total_num_rows = 0;
|
int64_t total_num_rows = 0;
|
||||||
|
|
||||||
// fill key set.
|
// fill key set.
|
||||||
|
|||||||
@ -37,6 +37,17 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
const storage::FileManagerContext& file_manager_context,
|
const storage::FileManagerContext& file_manager_context,
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
|
explicit StringIndexMarisa(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
|
||||||
|
: file_manager_(file_manager) {
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit StringIndexMarisa(
|
||||||
|
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
|
||||||
|
std::shared_ptr<milvus_storage::Space> space)
|
||||||
|
: file_manager_(file_manager), space_(space) {
|
||||||
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Size() override;
|
Size() override;
|
||||||
|
|
||||||
@ -63,6 +74,9 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildV2(const Config& Config = {}) override;
|
BuildV2(const Config& Config = {}) override;
|
||||||
|
|
||||||
@ -113,7 +127,8 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
prefix_match(const std::string_view prefix);
|
prefix_match(const std::string_view prefix);
|
||||||
|
|
||||||
void
|
void
|
||||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
LoadWithoutAssemble(const BinarySet& binary_set,
|
||||||
|
const Config& config) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Config config_;
|
Config config_;
|
||||||
|
|||||||
@ -154,6 +154,15 @@ GetIndexEngineVersionFromConfig(const Config& config) {
|
|||||||
return (std::stoi(index_engine_version.value()));
|
return (std::stoi(index_engine_version.value()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t
|
||||||
|
GetBitmapCardinalityLimitFromConfig(const Config& config) {
|
||||||
|
auto bitmap_limit = GetValueFromConfig<std::string>(
|
||||||
|
config, index::BITMAP_INDEX_CARDINALITY_LIMIT);
|
||||||
|
AssertInfo(bitmap_limit.has_value(),
|
||||||
|
"bitmap cardinality limit not exist in config");
|
||||||
|
return (std::stoi(bitmap_limit.value()));
|
||||||
|
}
|
||||||
|
|
||||||
// TODO :: too ugly
|
// TODO :: too ugly
|
||||||
storage::FieldDataMeta
|
storage::FieldDataMeta
|
||||||
GetFieldDataMetaFromConfig(const Config& config) {
|
GetFieldDataMetaFromConfig(const Config& config) {
|
||||||
|
|||||||
@ -103,6 +103,9 @@ GetIndexTypeFromConfig(const Config& config);
|
|||||||
IndexVersion
|
IndexVersion
|
||||||
GetIndexEngineVersionFromConfig(const Config& config);
|
GetIndexEngineVersionFromConfig(const Config& config);
|
||||||
|
|
||||||
|
int32_t
|
||||||
|
GetBitmapCardinalityLimitFromConfig(const Config& config);
|
||||||
|
|
||||||
storage::FieldDataMeta
|
storage::FieldDataMeta
|
||||||
GetFieldDataMetaFromConfig(const Config& config);
|
GetFieldDataMetaFromConfig(const Config& config);
|
||||||
|
|
||||||
|
|||||||
@ -32,7 +32,7 @@ set(MILVUS_TEST_FILES
|
|||||||
test_growing.cpp
|
test_growing.cpp
|
||||||
test_growing_index.cpp
|
test_growing_index.cpp
|
||||||
test_indexing.cpp
|
test_indexing.cpp
|
||||||
test_bitmap_index.cpp
|
test_hybrid_index.cpp
|
||||||
test_index_c_api.cpp
|
test_index_c_api.cpp
|
||||||
test_index_wrapper.cpp
|
test_index_wrapper.cpp
|
||||||
test_init.cpp
|
test_init.cpp
|
||||||
|
|||||||
@ -10,12 +10,14 @@
|
|||||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
#include <boost/format.hpp>
|
#include <boost/format.hpp>
|
||||||
|
#include <fstream>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <roaring/roaring.hh>
|
||||||
|
|
||||||
#include "common/Json.h"
|
#include "common/Json.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
@ -35,6 +37,8 @@
|
|||||||
#include "exec/expression/Expr.h"
|
#include "exec/expression/Expr.h"
|
||||||
#include "exec/Task.h"
|
#include "exec/Task.h"
|
||||||
#include "expr/ITypeExpr.h"
|
#include "expr/ITypeExpr.h"
|
||||||
|
#include "index/BitmapIndex.h"
|
||||||
|
#include "index/InvertedIndexTantivy.h"
|
||||||
|
|
||||||
using namespace milvus;
|
using namespace milvus;
|
||||||
using namespace milvus::query;
|
using namespace milvus::query;
|
||||||
@ -1271,7 +1275,7 @@ TEST(Expr, TestExprPerformance) {
|
|||||||
{DataType::DOUBLE, double_fid}};
|
{DataType::DOUBLE, double_fid}};
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -1678,7 +1682,7 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 100000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
// load field data
|
// load field data
|
||||||
auto fields = schema->get_fields();
|
auto fields = schema->get_fields();
|
||||||
@ -1739,7 +1743,7 @@ TEST_P(ExprTest, TestGrowingSegmentGetBatchSize) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
seg->PreInsert(N);
|
seg->PreInsert(N);
|
||||||
seg->Insert(0,
|
seg->Insert(0,
|
||||||
@ -1804,7 +1808,7 @@ TEST_P(ExprTest, TestConjuctExpr) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
// load field data
|
// load field data
|
||||||
auto fields = schema->get_fields();
|
auto fields = schema->get_fields();
|
||||||
@ -1871,7 +1875,7 @@ TEST_P(ExprTest, TestUnaryBenchTest) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -1942,7 +1946,7 @@ TEST_P(ExprTest, TestBinaryRangeBenchTest) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -2022,7 +2026,7 @@ TEST_P(ExprTest, TestLogicalUnaryBenchTest) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -2096,7 +2100,7 @@ TEST_P(ExprTest, TestBinaryLogicalBenchTest) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -2180,7 +2184,7 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -2263,7 +2267,7 @@ TEST_P(ExprTest, TestCompareExprBenchTest) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
@ -2333,7 +2337,7 @@ TEST_P(ExprTest, TestRefactorExprs) {
|
|||||||
schema->set_primary_field_id(str1_fid);
|
schema->set_primary_field_id(str1_fid);
|
||||||
|
|
||||||
auto seg = CreateSealedSegment(schema);
|
auto seg = CreateSealedSegment(schema);
|
||||||
int N = 1000000;
|
int N = 10000;
|
||||||
auto raw_data = DataGen(schema, N);
|
auto raw_data = DataGen(schema, N);
|
||||||
|
|
||||||
// load field data
|
// load field data
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#include "common/Tracer.h"
|
#include "common/Tracer.h"
|
||||||
#include "index/BitmapIndex.h"
|
#include "index/BitmapIndex.h"
|
||||||
|
#include "index/HybridScalarIndex.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/InsertData.h"
|
#include "storage/InsertData.h"
|
||||||
#include "indexbuilder/IndexFactory.h"
|
#include "indexbuilder/IndexFactory.h"
|
||||||
@ -60,7 +61,7 @@ GenerateData<std::string>(const size_t size, const size_t cardinality) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class BitmapIndexTest : public testing::Test {
|
class HybridIndexTestV1 : public testing::Test {
|
||||||
protected:
|
protected:
|
||||||
void
|
void
|
||||||
Init(int64_t collection_id,
|
Init(int64_t collection_id,
|
||||||
@ -88,7 +89,8 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
|
|
||||||
auto serialized_bytes = insert_data.Serialize(storage::Remote);
|
auto serialized_bytes = insert_data.Serialize(storage::Remote);
|
||||||
|
|
||||||
auto log_path = fmt::format("{}/{}/{}/{}/{}",
|
auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}",
|
||||||
|
"/tmp/test_hybrid/",
|
||||||
collection_id,
|
collection_id,
|
||||||
partition_id,
|
partition_id,
|
||||||
segment_id,
|
segment_id,
|
||||||
@ -103,6 +105,7 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
Config config;
|
Config config;
|
||||||
config["index_type"] = milvus::index::BITMAP_INDEX_TYPE;
|
config["index_type"] = milvus::index::BITMAP_INDEX_TYPE;
|
||||||
config["insert_files"] = std::vector<std::string>{log_path};
|
config["insert_files"] = std::vector<std::string>{log_path};
|
||||||
|
config["bitmap_cardinality_limit"] = "1000";
|
||||||
|
|
||||||
auto build_index =
|
auto build_index =
|
||||||
indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||||
@ -125,10 +128,14 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
index_->Load(milvus::tracer::TraceContext{}, config);
|
index_->Load(milvus::tracer::TraceContext{}, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
virtual void
|
||||||
SetUp() override {
|
SetParam() {
|
||||||
nb_ = 10000;
|
nb_ = 10000;
|
||||||
cardinality_ = 30;
|
cardinality_ = 30;
|
||||||
|
}
|
||||||
|
void
|
||||||
|
SetUp() override {
|
||||||
|
SetParam();
|
||||||
|
|
||||||
if constexpr (std::is_same_v<T, int8_t>) {
|
if constexpr (std::is_same_v<T, int8_t>) {
|
||||||
type_ = DataType::INT8;
|
type_ = DataType::INT8;
|
||||||
@ -162,7 +169,7 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
index_version);
|
index_version);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~BitmapIndexTest() override {
|
virtual ~HybridIndexTestV1() override {
|
||||||
boost::filesystem::remove_all(chunk_manager_->GetRootPath());
|
boost::filesystem::remove_all(chunk_manager_->GetRootPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +183,8 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
test_data.push_back(data_[i]);
|
test_data.push_back(data_[i]);
|
||||||
s.insert(data_[i]);
|
s.insert(data_[i]);
|
||||||
}
|
}
|
||||||
auto index_ptr = dynamic_cast<index::BitmapIndex<T>*>(index_.get());
|
auto index_ptr =
|
||||||
|
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||||
auto bitset = index_ptr->In(test_data.size(), test_data.data());
|
auto bitset = index_ptr->In(test_data.size(), test_data.data());
|
||||||
for (size_t i = 0; i < bitset.size(); i++) {
|
for (size_t i = 0; i < bitset.size(); i++) {
|
||||||
ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end());
|
ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end());
|
||||||
@ -192,7 +200,8 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
test_data.push_back(data_[i]);
|
test_data.push_back(data_[i]);
|
||||||
s.insert(data_[i]);
|
s.insert(data_[i]);
|
||||||
}
|
}
|
||||||
auto index_ptr = dynamic_cast<index::BitmapIndex<T>*>(index_.get());
|
auto index_ptr =
|
||||||
|
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||||
auto bitset = index_ptr->NotIn(test_data.size(), test_data.data());
|
auto bitset = index_ptr->NotIn(test_data.size(), test_data.data());
|
||||||
for (size_t i = 0; i < bitset.size(); i++) {
|
for (size_t i = 0; i < bitset.size(); i++) {
|
||||||
ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end());
|
ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end());
|
||||||
@ -219,7 +228,7 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
};
|
};
|
||||||
for (const auto& [test_value, op, ref] : test_cases) {
|
for (const auto& [test_value, op, ref] : test_cases) {
|
||||||
auto index_ptr =
|
auto index_ptr =
|
||||||
dynamic_cast<index::BitmapIndex<T>*>(index_.get());
|
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||||
auto bitset = index_ptr->Range(test_value, op);
|
auto bitset = index_ptr->Range(test_value, op);
|
||||||
for (size_t i = 0; i < bitset.size(); i++) {
|
for (size_t i = 0; i < bitset.size(); i++) {
|
||||||
auto ans = bitset[i];
|
auto ans = bitset[i];
|
||||||
@ -232,8 +241,65 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
void
|
||||||
std::shared_ptr<storage::ChunkManager> chunk_manager_;
|
TestRangeCompareFunc() {
|
||||||
|
if constexpr (!std::is_same_v<T, std::string>) {
|
||||||
|
using RefFunc = std::function<bool(int64_t)>;
|
||||||
|
struct TestParam {
|
||||||
|
int64_t lower_val;
|
||||||
|
int64_t upper_val;
|
||||||
|
bool lower_inclusive;
|
||||||
|
bool upper_inclusive;
|
||||||
|
RefFunc ref;
|
||||||
|
};
|
||||||
|
std::vector<TestParam> test_cases = {
|
||||||
|
{
|
||||||
|
10,
|
||||||
|
30,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
[&](int64_t i) { return 10 < data_[i] && data_[i] < 30; },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
10,
|
||||||
|
30,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
[&](int64_t i) { return 10 <= data_[i] && data_[i] < 30; },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
10,
|
||||||
|
30,
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
[&](int64_t i) { return 10 <= data_[i] && data_[i] <= 30; },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
10,
|
||||||
|
30,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
[&](int64_t i) { return 10 < data_[i] && data_[i] <= 30; },
|
||||||
|
}};
|
||||||
|
|
||||||
|
for (const auto& test_case : test_cases) {
|
||||||
|
auto index_ptr =
|
||||||
|
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||||
|
auto bitset = index_ptr->Range(test_case.lower_val,
|
||||||
|
test_case.lower_inclusive,
|
||||||
|
test_case.upper_val,
|
||||||
|
test_case.upper_inclusive);
|
||||||
|
for (size_t i = 0; i < bitset.size(); i++) {
|
||||||
|
auto ans = bitset[i];
|
||||||
|
auto should = test_case.ref(i);
|
||||||
|
ASSERT_EQ(ans, should)
|
||||||
|
<< "lower:" << test_case.lower_val
|
||||||
|
<< "upper:" << test_case.upper_val << ", @" << i
|
||||||
|
<< ", ans: " << ans << ", ref: " << should;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
IndexBasePtr index_;
|
IndexBasePtr index_;
|
||||||
@ -241,34 +307,92 @@ class BitmapIndexTest : public testing::Test {
|
|||||||
size_t nb_;
|
size_t nb_;
|
||||||
size_t cardinality_;
|
size_t cardinality_;
|
||||||
boost::container::vector<T> data_;
|
boost::container::vector<T> data_;
|
||||||
|
std::shared_ptr<storage::ChunkManager> chunk_manager_;
|
||||||
};
|
};
|
||||||
|
|
||||||
TYPED_TEST_SUITE_P(BitmapIndexTest);
|
TYPED_TEST_SUITE_P(HybridIndexTestV1);
|
||||||
|
|
||||||
TYPED_TEST_P(BitmapIndexTest, CountFuncTest) {
|
TYPED_TEST_P(HybridIndexTestV1, CountFuncTest) {
|
||||||
auto count = this->index_->Count();
|
auto count = this->index_->Count();
|
||||||
EXPECT_EQ(count, this->nb_);
|
EXPECT_EQ(count, this->nb_);
|
||||||
}
|
}
|
||||||
|
|
||||||
TYPED_TEST_P(BitmapIndexTest, INFuncTest) {
|
TYPED_TEST_P(HybridIndexTestV1, INFuncTest) {
|
||||||
this->TestInFunc();
|
this->TestInFunc();
|
||||||
}
|
}
|
||||||
|
|
||||||
TYPED_TEST_P(BitmapIndexTest, NotINFuncTest) {
|
TYPED_TEST_P(HybridIndexTestV1, NotINFuncTest) {
|
||||||
this->TestNotInFunc();
|
this->TestNotInFunc();
|
||||||
}
|
}
|
||||||
|
|
||||||
TYPED_TEST_P(BitmapIndexTest, CompareValFuncTest) {
|
TYPED_TEST_P(HybridIndexTestV1, CompareValFuncTest) {
|
||||||
this->TestCompareValueFunc();
|
this->TestCompareValueFunc();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV1, TestRangeCompareFuncTest) {
|
||||||
|
this->TestRangeCompareFunc();
|
||||||
|
}
|
||||||
|
|
||||||
using BitmapType =
|
using BitmapType =
|
||||||
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
|
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
|
||||||
|
|
||||||
REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTest,
|
REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV1,
|
||||||
CountFuncTest,
|
CountFuncTest,
|
||||||
INFuncTest,
|
INFuncTest,
|
||||||
NotINFuncTest,
|
NotINFuncTest,
|
||||||
CompareValFuncTest);
|
CompareValFuncTest,
|
||||||
|
TestRangeCompareFuncTest);
|
||||||
|
|
||||||
INSTANTIATE_TYPED_TEST_SUITE_P(BitmapE2ECheck, BitmapIndexTest, BitmapType);
|
INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_LowCardinality,
|
||||||
|
HybridIndexTestV1,
|
||||||
|
BitmapType);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class HybridIndexTestV2 : public HybridIndexTestV1<T> {
|
||||||
|
public:
|
||||||
|
virtual void
|
||||||
|
SetParam() override {
|
||||||
|
this->nb_ = 10000;
|
||||||
|
this->cardinality_ = 2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~HybridIndexTestV2() {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TYPED_TEST_SUITE_P(HybridIndexTestV2);
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV2, CountFuncTest) {
|
||||||
|
auto count = this->index_->Count();
|
||||||
|
EXPECT_EQ(count, this->nb_);
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV2, INFuncTest) {
|
||||||
|
this->TestInFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV2, NotINFuncTest) {
|
||||||
|
this->TestNotInFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV2, CompareValFuncTest) {
|
||||||
|
this->TestCompareValueFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPED_TEST_P(HybridIndexTestV2, TestRangeCompareFuncTest) {
|
||||||
|
this->TestRangeCompareFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
using BitmapType =
|
||||||
|
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
|
||||||
|
|
||||||
|
REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2,
|
||||||
|
CountFuncTest,
|
||||||
|
INFuncTest,
|
||||||
|
NotINFuncTest,
|
||||||
|
CompareValFuncTest,
|
||||||
|
TestRangeCompareFuncTest);
|
||||||
|
|
||||||
|
INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_HighCardinality,
|
||||||
|
HybridIndexTestV2,
|
||||||
|
BitmapType);
|
||||||
@ -15,7 +15,11 @@
|
|||||||
|
|
||||||
#include "gtest/gtest-typed-test.h"
|
#include "gtest/gtest-typed-test.h"
|
||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
|
#include "index/BitmapIndex.h"
|
||||||
|
#include "index/InvertedIndexTantivy.h"
|
||||||
|
#include "index/ScalarIndex.h"
|
||||||
#include "common/CDataType.h"
|
#include "common/CDataType.h"
|
||||||
|
#include "common/Types.h"
|
||||||
#include "knowhere/comp/index_param.h"
|
#include "knowhere/comp/index_param.h"
|
||||||
#include "test_utils/indexbuilder_test_utils.h"
|
#include "test_utils/indexbuilder_test_utils.h"
|
||||||
#include "test_utils/AssertUtils.h"
|
#include "test_utils/AssertUtils.h"
|
||||||
@ -373,7 +377,11 @@ TYPED_TEST_P(TypedScalarIndexTestV2, Base) {
|
|||||||
create_index_info, file_manager_context, space);
|
create_index_info, file_manager_context, space);
|
||||||
auto scalar_index =
|
auto scalar_index =
|
||||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||||
scalar_index->BuildV2();
|
milvus::Config config;
|
||||||
|
if (index_type == "BITMAP") {
|
||||||
|
config["bitmap_cardinality_limit"] = "1000";
|
||||||
|
}
|
||||||
|
scalar_index->BuildV2(config);
|
||||||
scalar_index->UploadV2();
|
scalar_index->UploadV2();
|
||||||
|
|
||||||
auto new_index =
|
auto new_index =
|
||||||
@ -391,3 +399,260 @@ REGISTER_TYPED_TEST_SUITE_P(TypedScalarIndexTestV2, Base);
|
|||||||
INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck,
|
INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck,
|
||||||
TypedScalarIndexTestV2,
|
TypedScalarIndexTestV2,
|
||||||
ScalarT);
|
ScalarT);
|
||||||
|
|
||||||
|
using namespace milvus::index;
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T>
|
||||||
|
GenerateRawData(int N, int cardinality) {
|
||||||
|
using std::vector;
|
||||||
|
std::default_random_engine random(60);
|
||||||
|
std::normal_distribution<> distr(0, 1);
|
||||||
|
vector<T> data(N);
|
||||||
|
for (auto& x : data) {
|
||||||
|
x = random() % (cardinality);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
std::vector<std::string>
|
||||||
|
GenerateRawData(int N, int cardinality) {
|
||||||
|
using std::vector;
|
||||||
|
std::default_random_engine random(60);
|
||||||
|
std::normal_distribution<> distr(0, 1);
|
||||||
|
vector<std::string> data(N);
|
||||||
|
for (auto& x : data) {
|
||||||
|
x = std::to_string(random() % (cardinality));
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
IndexBasePtr
|
||||||
|
TestBuildIndex(int N, int cardinality, int index_type) {
|
||||||
|
auto raw_data = GenerateRawData<T>(N, cardinality);
|
||||||
|
if (index_type == 0) {
|
||||||
|
auto index = std::make_unique<milvus::index::BitmapIndex<T>>();
|
||||||
|
index->Build(N, raw_data.data());
|
||||||
|
return std::move(index);
|
||||||
|
} else if (index_type == 1) {
|
||||||
|
if constexpr (std::is_same_v<T, std::string>) {
|
||||||
|
auto index = std::make_unique<milvus::index::StringIndexMarisa>();
|
||||||
|
index->Build(N, raw_data.data());
|
||||||
|
return std::move(index);
|
||||||
|
}
|
||||||
|
auto index = milvus::index::CreateScalarIndexSort<T>();
|
||||||
|
index->Build(N, raw_data.data());
|
||||||
|
return std::move(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
TestIndexSearchIn() {
|
||||||
|
// low data cardinality
|
||||||
|
{
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<int> data_cardinality = {10, 20, 100};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<T>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<T>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(sort_index.get());
|
||||||
|
std::vector<T> terms;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
terms.push_back(static_cast<T>(i));
|
||||||
|
}
|
||||||
|
auto final1 = bitmap_index_ptr->In(10, terms.data());
|
||||||
|
auto final2 = sort_index_ptr->In(10, terms.data());
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->NotIn(10, terms.data());
|
||||||
|
auto final4 = sort_index_ptr->NotIn(10, terms.data());
|
||||||
|
EXPECT_EQ(final4.size(), final3.size());
|
||||||
|
for (int i = 0; i < final3.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// high data cardinality
|
||||||
|
{
|
||||||
|
int N = 10000;
|
||||||
|
std::vector<int> data_cardinality = {1001, 2000};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<T>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<T>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(sort_index.get());
|
||||||
|
std::vector<T> terms;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
terms.push_back(static_cast<T>(i));
|
||||||
|
}
|
||||||
|
auto final1 = bitmap_index_ptr->In(10, terms.data());
|
||||||
|
auto final2 = sort_index_ptr->In(10, terms.data());
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->NotIn(10, terms.data());
|
||||||
|
auto final4 = sort_index_ptr->NotIn(10, terms.data());
|
||||||
|
EXPECT_EQ(final4.size(), final3.size());
|
||||||
|
for (int i = 0; i < final3.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void
|
||||||
|
TestIndexSearchIn<std::string>() {
|
||||||
|
// low data cardinality
|
||||||
|
{
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<int> data_cardinality = {10, 20, 100};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<std::string>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<std::string>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<std::string>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<std::string>*>(sort_index.get());
|
||||||
|
std::vector<std::string> terms;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
terms.push_back(std::to_string(i));
|
||||||
|
}
|
||||||
|
auto final1 = bitmap_index_ptr->In(10, terms.data());
|
||||||
|
auto final2 = sort_index_ptr->In(10, terms.data());
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->NotIn(10, terms.data());
|
||||||
|
auto final4 = sort_index_ptr->NotIn(10, terms.data());
|
||||||
|
EXPECT_EQ(final4.size(), final3.size());
|
||||||
|
for (int i = 0; i < final3.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// high data cardinality
|
||||||
|
{
|
||||||
|
int N = 10000;
|
||||||
|
std::vector<int> data_cardinality = {1001, 2000};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<std::string>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<std::string>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<std::string>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<std::string>*>(sort_index.get());
|
||||||
|
std::vector<std::string> terms;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
terms.push_back(std::to_string(i));
|
||||||
|
}
|
||||||
|
auto final1 = bitmap_index_ptr->In(10, terms.data());
|
||||||
|
auto final2 = sort_index_ptr->In(10, terms.data());
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->NotIn(10, terms.data());
|
||||||
|
auto final4 = sort_index_ptr->NotIn(10, terms.data());
|
||||||
|
EXPECT_EQ(final4.size(), final3.size());
|
||||||
|
for (int i = 0; i < final3.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ScalarTest, test_function_In) {
|
||||||
|
TestIndexSearchIn<int8_t>();
|
||||||
|
TestIndexSearchIn<int16_t>();
|
||||||
|
TestIndexSearchIn<int32_t>();
|
||||||
|
TestIndexSearchIn<int64_t>();
|
||||||
|
TestIndexSearchIn<float>();
|
||||||
|
TestIndexSearchIn<double>();
|
||||||
|
TestIndexSearchIn<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
TestIndexSearchRange() {
|
||||||
|
// low data cordinality
|
||||||
|
{
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<int> data_cardinality = {10, 20, 100};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<T>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<T>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(sort_index.get());
|
||||||
|
|
||||||
|
auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan);
|
||||||
|
auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan);
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->Range(10, true, 100, false);
|
||||||
|
auto final4 = sort_index_ptr->Range(10, true, 100, false);
|
||||||
|
EXPECT_EQ(final3.size(), final4.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// high data cordinality
|
||||||
|
{
|
||||||
|
int N = 10000;
|
||||||
|
std::vector<int> data_cardinality = {1001, 2000};
|
||||||
|
for (auto& card : data_cardinality) {
|
||||||
|
auto bitmap_index = TestBuildIndex<T>(N, card, 0);
|
||||||
|
auto bitmap_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(bitmap_index.get());
|
||||||
|
auto sort_index = TestBuildIndex<T>(N, card, 1);
|
||||||
|
auto sort_index_ptr =
|
||||||
|
dynamic_cast<ScalarIndex<T>*>(sort_index.get());
|
||||||
|
|
||||||
|
auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan);
|
||||||
|
auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan);
|
||||||
|
EXPECT_EQ(final1.size(), final2.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final1[i], final2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto final3 = bitmap_index_ptr->Range(10, true, 100, false);
|
||||||
|
auto final4 = sort_index_ptr->Range(10, true, 100, false);
|
||||||
|
EXPECT_EQ(final3.size(), final4.size());
|
||||||
|
for (int i = 0; i < final1.size(); i++) {
|
||||||
|
EXPECT_EQ(final3[i], final4[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ScalarTest, test_function_range) {
|
||||||
|
TestIndexSearchRange<int8_t>();
|
||||||
|
TestIndexSearchRange<int16_t>();
|
||||||
|
TestIndexSearchRange<int32_t>();
|
||||||
|
TestIndexSearchRange<int64_t>();
|
||||||
|
TestIndexSearchRange<float>();
|
||||||
|
TestIndexSearchRange<double>();
|
||||||
|
}
|
||||||
|
|||||||
@ -332,6 +332,13 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e
|
|||||||
func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error {
|
func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error {
|
||||||
indexType := indexParams[common.IndexTypeKey]
|
indexType := indexParams[common.IndexTypeKey]
|
||||||
|
|
||||||
|
if indexType == indexparamcheck.IndexBitmap {
|
||||||
|
_, exist := indexParams[common.BitmapCardinalityLimitKey]
|
||||||
|
if !exist {
|
||||||
|
indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType)
|
checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Failed to get index checker", zap.String(common.IndexTypeKey, indexType))
|
log.Warn("Failed to get index checker", zap.String(common.IndexTypeKey, indexType))
|
||||||
|
|||||||
@ -112,6 +112,8 @@ const (
|
|||||||
MaxCapacityKey = "max_capacity"
|
MaxCapacityKey = "max_capacity"
|
||||||
|
|
||||||
DropRatioBuildKey = "drop_ratio_build"
|
DropRatioBuildKey = "drop_ratio_build"
|
||||||
|
|
||||||
|
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Collection properties key
|
// Collection properties key
|
||||||
|
|||||||
@ -11,7 +11,7 @@ import (
|
|||||||
func Test_BitmapIndexChecker(t *testing.T) {
|
func Test_BitmapIndexChecker(t *testing.T) {
|
||||||
c := newBITMAPChecker()
|
c := newBITMAPChecker()
|
||||||
|
|
||||||
assert.NoError(t, c.CheckTrain(map[string]string{}))
|
assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"}))
|
||||||
|
|
||||||
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64))
|
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64))
|
||||||
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float))
|
assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float))
|
||||||
@ -19,4 +19,6 @@ func Test_BitmapIndexChecker(t *testing.T) {
|
|||||||
|
|
||||||
assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON))
|
assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON))
|
||||||
assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array))
|
assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array))
|
||||||
|
assert.Error(t, c.CheckTrain(map[string]string{}))
|
||||||
|
assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"}))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,17 +2,21 @@ package indexparamcheck
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
// STLSORTChecker checks if a STL_SORT index can be built.
|
|
||||||
type BITMAPChecker struct {
|
type BITMAPChecker struct {
|
||||||
scalarIndexChecker
|
scalarIndexChecker
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *BITMAPChecker) CheckTrain(params map[string]string) error {
|
func (c *BITMAPChecker) CheckTrain(params map[string]string) error {
|
||||||
|
if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) {
|
||||||
|
return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt")
|
||||||
|
}
|
||||||
return c.scalarIndexChecker.CheckTrain(params)
|
return c.scalarIndexChecker.CheckTrain(params)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -42,13 +42,14 @@ const (
|
|||||||
DefaultSessionTTL = 30 // s
|
DefaultSessionTTL = 30 // s
|
||||||
DefaultSessionRetryTimes = 30
|
DefaultSessionRetryTimes = 30
|
||||||
|
|
||||||
DefaultMaxDegree = 56
|
DefaultMaxDegree = 56
|
||||||
DefaultSearchListSize = 100
|
DefaultSearchListSize = 100
|
||||||
DefaultPQCodeBudgetGBRatio = 0.125
|
DefaultPQCodeBudgetGBRatio = 0.125
|
||||||
DefaultBuildNumThreadsRatio = 1.0
|
DefaultBuildNumThreadsRatio = 1.0
|
||||||
DefaultSearchCacheBudgetGBRatio = 0.10
|
DefaultSearchCacheBudgetGBRatio = 0.10
|
||||||
DefaultLoadNumThreadRatio = 8.0
|
DefaultLoadNumThreadRatio = 8.0
|
||||||
DefaultBeamWidthRatio = 4.0
|
DefaultBeamWidthRatio = 4.0
|
||||||
|
DefaultBitmapIndexCardinalityBound = 500
|
||||||
)
|
)
|
||||||
|
|
||||||
// ComponentParam is used to quickly and easily access all components' configurations.
|
// ComponentParam is used to quickly and easily access all components' configurations.
|
||||||
@ -212,6 +213,7 @@ type commonConfig struct {
|
|||||||
BeamWidthRatio ParamItem `refreshable:"true"`
|
BeamWidthRatio ParamItem `refreshable:"true"`
|
||||||
GracefulTime ParamItem `refreshable:"true"`
|
GracefulTime ParamItem `refreshable:"true"`
|
||||||
GracefulStopTimeout ParamItem `refreshable:"true"`
|
GracefulStopTimeout ParamItem `refreshable:"true"`
|
||||||
|
BitmapIndexCardinalityBound ParamItem `refreshable:"false"`
|
||||||
|
|
||||||
StorageType ParamItem `refreshable:"false"`
|
StorageType ParamItem `refreshable:"false"`
|
||||||
SimdType ParamItem `refreshable:"false"`
|
SimdType ParamItem `refreshable:"false"`
|
||||||
@ -443,6 +445,14 @@ This configuration is only used by querynode and indexnode, it selects CPU instr
|
|||||||
}
|
}
|
||||||
p.IndexSliceSize.Init(base.mgr)
|
p.IndexSliceSize.Init(base.mgr)
|
||||||
|
|
||||||
|
p.BitmapIndexCardinalityBound = ParamItem{
|
||||||
|
Key: "common.bitmapIndexCardinalityBound",
|
||||||
|
Version: "2.5.0",
|
||||||
|
DefaultValue: strconv.Itoa(DefaultBitmapIndexCardinalityBound),
|
||||||
|
Export: true,
|
||||||
|
}
|
||||||
|
p.BitmapIndexCardinalityBound.Init(base.mgr)
|
||||||
|
|
||||||
p.EnableMaterializedView = ParamItem{
|
p.EnableMaterializedView = ParamItem{
|
||||||
Key: "common.materializedView.enabled",
|
Key: "common.materializedView.enabled",
|
||||||
Version: "2.5.0",
|
Version: "2.5.0",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user