diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index f578afc9de..ff8d07d7f8 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -34,6 +34,8 @@ namespace milvus { namespace index { +constexpr size_t ALIGNMENT = 32; // 32-byte alignment + template BitmapIndex::BitmapIndex( const storage::FileManagerContext& file_manager_context) @@ -373,7 +375,7 @@ BitmapIndex::BuildOffsetCache() { mmap_offsets_cache_.resize(total_num_rows_); for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end(); ++it) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { mmap_offsets_cache_[v] = it; } } @@ -430,50 +432,26 @@ BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, } template -void -BitmapIndex::DeserializeIndexDataForMmap(const char* data_ptr, - size_t index_length) { - for (size_t i = 0; i < index_length; ++i) { - T key; - memcpy(&key, data_ptr, sizeof(T)); - data_ptr += sizeof(T); - - roaring::Roaring value; - value = roaring::Roaring::read(reinterpret_cast(data_ptr)); - auto size = value.getSizeInBytes(); - - bitmap_info_map_[key] = {static_cast(data_ptr - mmap_data_), - size}; - data_ptr += size; - for (const auto& v : value) { - valid_bitset_.set(v); - } - } +T +BitmapIndex::ParseKey(const uint8_t** ptr) { + T key; + memcpy(&key, *ptr, sizeof(T)); + *ptr += sizeof(T); + return key; } template <> -void -BitmapIndex::DeserializeIndexDataForMmap(const char* data_ptr, - size_t index_length) { - for (size_t i = 0; i < index_length; ++i) { - size_t key_size; - memcpy(&key_size, data_ptr, sizeof(size_t)); - data_ptr += sizeof(size_t); +std::string +BitmapIndex::ParseKey(const uint8_t** ptr) { + auto data_ptr = *ptr; + size_t key_size; + memcpy(&key_size, data_ptr, sizeof(size_t)); + data_ptr += sizeof(size_t); - std::string key(reinterpret_cast(data_ptr), key_size); - data_ptr += key_size; - - roaring::Roaring value; - value = roaring::Roaring::read(reinterpret_cast(data_ptr)); - auto size = value.getSizeInBytes(); - - bitmap_info_map_[key] = {static_cast(data_ptr - mmap_data_), - size}; - data_ptr += size; - for (const auto& v : value) { - valid_bitset_.set(v); - } - } + std::string key(reinterpret_cast(data_ptr), key_size); + data_ptr += key_size; + *ptr = data_ptr; + return key; } template @@ -486,17 +464,42 @@ BitmapIndex::MMapIndexData(const std::string& file_name, std::filesystem::path(file_name).parent_path()); auto file = File::Open(file_name, O_RDWR | O_CREAT | O_TRUNC); - auto written = file.Write(data_ptr, data_size); - if (written != data_size) { - file.Close(); - remove(file_name.c_str()); - PanicInfo(ErrorCode::UnistdError, - fmt::format("write index to fd error: {}", strerror(errno))); + auto file_offset = 0; + std::map> bitmaps; + + for (size_t i = 0; i < index_length; ++i) { + T key = ParseKey(&data_ptr); + + roaring::Roaring value; + value = roaring::Roaring::read(reinterpret_cast(data_ptr)); + for (const auto& v : value) { + valid_bitset_.set(v); + } + + // convert roaring vaule to frozen mode + int32_t frozen_size = value.getFrozenSizeInBytes(); + auto aligned_size = + ((frozen_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT; + std::vector buf(aligned_size, 0); + value.writeFrozen(reinterpret_cast(buf.data())); + + auto written = file.Write(buf.data(), aligned_size); + if (written != aligned_size) { + file.Close(); + remove(file_name.c_str()); + PanicInfo( + ErrorCode::UnistdError, + fmt::format("write data to fd error: {}", strerror(errno))); + } + bitmaps[key] = {file_offset, frozen_size}; + + file_offset += aligned_size; + data_ptr += value.getSizeInBytes(); } file.Seek(0, SEEK_SET); mmap_data_ = static_cast( - mmap(NULL, data_size, PROT_READ, MAP_PRIVATE, file.Descriptor(), 0)); + mmap(NULL, file_offset, PROT_READ, MAP_PRIVATE, file.Descriptor(), 0)); if (mmap_data_ == MAP_FAILED) { file.Close(); remove(file_name.c_str()); @@ -504,11 +507,15 @@ BitmapIndex::MMapIndexData(const std::string& file_name, ErrorCode::UnexpectedError, "failed to mmap: {}", strerror(errno)); } - mmap_size_ = data_size; + mmap_size_ = file_offset; unlink(file_name.c_str()); char* ptr = mmap_data_; - DeserializeIndexDataForMmap(ptr, index_length); + for (const auto& [key, value] : bitmaps) { + const auto& [offset, size] = value; + bitmap_info_map_[key] = + roaring::Roaring::frozenView(ptr + offset, size); + } is_mmap_ = true; } @@ -595,7 +602,7 @@ BitmapIndex::In(const size_t n, const T* values) { auto val = values[i]; auto it = bitmap_info_map_.find(val); if (it != bitmap_info_map_.end()) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { res.set(v); } } @@ -634,7 +641,7 @@ BitmapIndex::NotIn(const size_t n, const T* values) { auto val = values[i]; auto it = bitmap_info_map_.find(val); if (it != bitmap_info_map_.end()) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { res.reset(v); } } @@ -818,7 +825,7 @@ BitmapIndex::RangeForMmap(const T value, const OpType op) { } for (; lb != ub; lb++) { - for (const auto& v : AccessBitmap(lb->second)) { + for (const auto& v : lb->second) { res.set(v); } } @@ -1014,7 +1021,7 @@ BitmapIndex::RangeForMmap(const T lower_value, } for (; lb != ub; lb++) { - for (const auto& v : AccessBitmap(lb->second)) { + for (const auto& v : lb->second) { res.set(v); } } @@ -1112,7 +1119,7 @@ BitmapIndex::Reverse_Lookup(size_t idx) const { if (is_mmap_) { for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end(); it++) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { if (v == idx) { return it->first; } @@ -1233,7 +1240,7 @@ BitmapIndex::Query(const DatasetPtr& dataset) { ++it) { const auto& key = it->first; if (milvus::query::Match(key, prefix, op)) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { res.set(v); } } @@ -1282,7 +1289,7 @@ BitmapIndex::RegexQuery(const std::string& regex_pattern) { ++it) { const auto& key = it->first; if (matcher(key)) { - for (const auto& v : AccessBitmap(it->second)) { + for (const auto& v : it->second) { res.set(v); } } diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index fb677e6f31..6dcd1733b3 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -185,8 +185,8 @@ class BitmapIndex : public ScalarIndex { std::pair DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size); - void - DeserializeIndexDataForMmap(const char* data_ptr, size_t index_length); + T + ParseKey(const uint8_t** ptr); void DeserializeIndexData(const uint8_t* data_ptr, size_t index_length); @@ -239,11 +239,6 @@ class BitmapIndex : public ScalarIndex { size_t data_size, size_t index_length); - roaring::Roaring - AccessBitmap(const BitmapInfo& info) const { - return roaring::Roaring::read(mmap_data_ + info.offset_, info.size_); - } - void UnmapIndexData(); @@ -255,7 +250,7 @@ class BitmapIndex : public ScalarIndex { bool is_mmap_{false}; char* mmap_data_; int64_t mmap_size_; - std::map bitmap_info_map_; + std::map bitmap_info_map_; size_t total_num_rows_{0}; proto::schema::FieldSchema schema_; bool use_offset_cache_{false}; @@ -263,7 +258,8 @@ class BitmapIndex : public ScalarIndex { data_offsets_cache_; std::vector::iterator> bitsets_offsets_cache_; - std::vector::iterator> mmap_offsets_cache_; + std::vector::iterator> + mmap_offsets_cache_; std::shared_ptr file_manager_; // generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate