mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: cachinglayer: add mmap and eviction support for TextMatchIndex (#44806)
issue: #41435, #44502 Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>
This commit is contained in:
parent
9c2aeaa258
commit
4bd30a74ca
@ -101,19 +101,26 @@ class TextMatchIndex : public InvertedIndexTantivy<std::string> {
|
||||
class TextMatchIndexHolder {
|
||||
public:
|
||||
explicit TextMatchIndexHolder(
|
||||
std::unique_ptr<milvus::index::TextMatchIndex> index)
|
||||
: index_(std::move(index)), size_(index_ ? index_->ByteSize() : 0) {
|
||||
if (size_ > 0) {
|
||||
milvus::cachinglayer::Manager::GetInstance().ChargeLoadedResource(
|
||||
{size_, 0});
|
||||
}
|
||||
std::unique_ptr<milvus::index::TextMatchIndex> index, bool mmap_enabled)
|
||||
: index_(std::move(index)), loaded_size_([&]() {
|
||||
if (!index_) {
|
||||
return milvus::cachinglayer::ResourceUsage(0, 0);
|
||||
}
|
||||
if (mmap_enabled) {
|
||||
return milvus::cachinglayer::ResourceUsage(
|
||||
0, index_->ByteSize());
|
||||
} else {
|
||||
return milvus::cachinglayer::ResourceUsage(index_->ByteSize(),
|
||||
0);
|
||||
}
|
||||
}()) {
|
||||
milvus::cachinglayer::Manager::GetInstance().ChargeLoadedResource(
|
||||
loaded_size_);
|
||||
}
|
||||
|
||||
~TextMatchIndexHolder() {
|
||||
if (size_ > 0) {
|
||||
milvus::cachinglayer::Manager::GetInstance().RefundLoadedResource(
|
||||
{size_, 0});
|
||||
}
|
||||
milvus::cachinglayer::Manager::GetInstance().RefundLoadedResource(
|
||||
loaded_size_);
|
||||
}
|
||||
|
||||
milvus::index::TextMatchIndex*
|
||||
@ -123,7 +130,7 @@ class TextMatchIndexHolder {
|
||||
|
||||
private:
|
||||
std::unique_ptr<milvus::index::TextMatchIndex> index_;
|
||||
int64_t size_;
|
||||
const milvus::cachinglayer::ResourceUsage loaded_size_;
|
||||
};
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -1739,8 +1739,8 @@ ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) {
|
||||
index->RegisterTokenizer("milvus_tokenizer",
|
||||
field_meta.get_analyzer_params().c_str());
|
||||
|
||||
text_indexes_[field_id] =
|
||||
std::make_shared<index::TextMatchIndexHolder>(std::move(index));
|
||||
text_indexes_[field_id] = std::make_shared<index::TextMatchIndexHolder>(
|
||||
std::move(index), cfg.GetScalarIndexEnableMmap());
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -41,7 +41,7 @@ TextMatchIndexTranslator::TextMatchIndexTranslator(
|
||||
/* is_index */ true),
|
||||
milvus::segcore::getCacheWarmupPolicy(/* is_vector */ false,
|
||||
/* is_index */ true),
|
||||
/* support_eviction */ false) {
|
||||
/* support_eviction */ true) {
|
||||
}
|
||||
|
||||
size_t
|
||||
@ -59,7 +59,16 @@ std::pair<milvus::cachinglayer::ResourceUsage,
|
||||
TextMatchIndexTranslator::estimated_byte_size_of_cell(
|
||||
milvus::cachinglayer::cid_t) const {
|
||||
// ignore the cid checking, because there is only one cell
|
||||
return {{load_info_.index_size, 0}, {2 * load_info_.index_size, 0}};
|
||||
if (load_info_.enable_mmap) {
|
||||
return {{0, load_info_.index_size},
|
||||
{load_info_.index_size, load_info_.index_size}};
|
||||
} else {
|
||||
// The reason the maximum disk usage is not zero is that the text match index
|
||||
// is first written to the disk, then loaded into memory. Only after that are
|
||||
// the disk files deleted.
|
||||
return {{load_info_.index_size, 0},
|
||||
{load_info_.index_size, load_info_.index_size}};
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
@ -98,7 +107,11 @@ TextMatchIndexTranslator::get_cells(
|
||||
load_info_.field_id,
|
||||
load_info_.segment_id);
|
||||
|
||||
index->SetCellSize({index->ByteSize(), 0});
|
||||
if (load_info_.enable_mmap) {
|
||||
index->SetCellSize({0, index->ByteSize()});
|
||||
} else {
|
||||
index->SetCellSize({index->ByteSize(), 0});
|
||||
}
|
||||
|
||||
std::vector<std::pair<milvus::cachinglayer::cid_t,
|
||||
std::unique_ptr<milvus::index::TextMatchIndex>>>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user