milvus/internal/core/src/index/BitmapIndex.h
sparknack 4fae074d56
enhance: add write rate limit for disk file writer (#43912)
issue: #43040

---------

Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>
2025-08-25 10:27:47 +08:00

287 lines
7.3 KiB
C++

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <roaring/roaring.hh>
#include "common/RegexQuery.h"
#include "index/ScalarIndex.h"
#include "pb/common.pb.h"
#include "storage/FileManager.h"
#include "storage/MemFileManagerImpl.h"
namespace milvus {
namespace index {
struct BitmapInfo {
size_t offset_;
size_t size_;
};
enum class BitmapIndexBuildMode {
ROARING,
BITSET,
};
/*
* @brief Implementation of Bitmap Index
* @details This index only for scalar Integral type.
*/
template <typename T>
class BitmapIndex : public ScalarIndex<T> {
public:
explicit BitmapIndex(
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
~BitmapIndex() {
if (is_mmap_) {
UnmapIndexData();
}
}
BinarySet
Serialize(const Config& config) override;
void
Load(const BinarySet& index_binary, const Config& config = {}) override;
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
int64_t
Count() override {
return total_num_rows_;
}
ScalarIndexType
GetIndexType() const override {
return ScalarIndexType::BITMAP;
}
void
Build(size_t n, const T* values, const bool* valid_data = nullptr) override;
void
Build(const Config& config = {}) override;
void
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
const TargetBitmap
In(size_t n, const T* values) override;
const TargetBitmap
NotIn(size_t n, const T* values) override;
const TargetBitmap
IsNull() override;
const TargetBitmap
IsNotNull() override;
const TargetBitmap
Range(T value, OpType op) override;
const TargetBitmap
Range(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive) override;
std::optional<T>
Reverse_Lookup(size_t offset) const override;
int64_t
Size() override {
return Count();
}
IndexStatsPtr
Upload(const Config& config = {}) override;
const bool
HasRawData() const override {
if (schema_.data_type() == proto::schema::DataType::Array) {
return false;
}
return true;
}
void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;
const TargetBitmap
Query(const DatasetPtr& dataset) override;
bool
SupportPatternMatch() const override {
return SupportRegexQuery();
}
const TargetBitmap
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
switch (op) {
case proto::plan::OpType::PrefixMatch:
case proto::plan::OpType::PostfixMatch:
case proto::plan::OpType::InnerMatch: {
auto dataset = std::make_unique<Dataset>();
dataset->Set(milvus::index::OPERATOR_TYPE, op);
dataset->Set(milvus::index::MATCH_VALUE, pattern);
return Query(std::move(dataset));
}
case proto::plan::OpType::Match: {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
}
default:
ThrowInfo(ErrorCode::OpTypeInvalid,
"not supported op type: {} for index PatterMatch",
op);
}
}
bool
SupportRegexQuery() const override {
return std::is_same_v<T, std::string>;
}
const TargetBitmap
RegexQuery(const std::string& regex_pattern) override;
public:
int64_t
Cardinality() {
if (is_mmap_) {
return bitmap_info_map_.size();
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return data_.size();
} else {
return bitsets_.size();
}
}
private:
void
BuildPrimitiveField(const std::vector<FieldDataPtr>& datas);
void
BuildArrayField(const std::vector<FieldDataPtr>& datas);
size_t
GetIndexDataSize();
void
SerializeIndexData(uint8_t* index_data_ptr);
std::pair<std::shared_ptr<uint8_t[]>, size_t>
SerializeIndexMeta();
std::pair<size_t, size_t>
DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size);
T
ParseKey(const uint8_t** ptr);
void
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);
void
BuildOffsetCache();
T
Reverse_Lookup_InCache(size_t idx) const;
void
ChooseIndexLoadMode(int64_t index_length);
bool
ShouldSkip(const T lower_value, const T upper_value, const OpType op);
TargetBitmap
ConvertRoaringToBitset(const roaring::Roaring& values);
TargetBitmap
RangeForRoaring(T value, OpType op);
TargetBitmap
RangeForBitset(T value, OpType op);
TargetBitmap
RangeForMmap(T value, OpType op);
TargetBitmap
RangeForRoaring(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive);
TargetBitmap
RangeForBitset(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive);
TargetBitmap
RangeForMmap(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive);
void
MMapIndexData(const std::string& filepath,
const uint8_t* data,
size_t data_size,
size_t index_length,
milvus::proto::common::LoadPriority priority);
void
UnmapIndexData();
public:
bool is_built_{false};
BitmapIndexBuildMode build_mode_;
std::map<T, roaring::Roaring> data_;
std::map<T, TargetBitmap> bitsets_;
bool is_mmap_{false};
char* mmap_data_;
int64_t mmap_size_;
std::map<T, roaring::Roaring> bitmap_info_map_;
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
bool use_offset_cache_{false};
std::vector<typename std::map<T, roaring::Roaring>::iterator>
data_offsets_cache_;
std::vector<typename std::map<T, TargetBitmap>::iterator>
bitsets_offsets_cache_;
std::vector<typename std::map<T, roaring::Roaring>::iterator>
mmap_offsets_cache_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
TargetBitmap valid_bitset_;
};
} // namespace index
} // namespace milvus