enhance: cachinglayer: add mmap and eviction support for TextMatchIndex (#44806)

issue: #41435, #44502

Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>
This commit is contained in:
sparknack 2025-10-17 14:42:02 +08:00 committed by GitHub
parent 9c2aeaa258
commit 4bd30a74ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 36 additions and 16 deletions

View File

@ -101,19 +101,26 @@ class TextMatchIndex : public InvertedIndexTantivy<std::string> {
class TextMatchIndexHolder { class TextMatchIndexHolder {
public: public:
explicit TextMatchIndexHolder( explicit TextMatchIndexHolder(
std::unique_ptr<milvus::index::TextMatchIndex> index) std::unique_ptr<milvus::index::TextMatchIndex> index, bool mmap_enabled)
: index_(std::move(index)), size_(index_ ? index_->ByteSize() : 0) { : index_(std::move(index)), loaded_size_([&]() {
if (size_ > 0) { if (!index_) {
milvus::cachinglayer::Manager::GetInstance().ChargeLoadedResource( return milvus::cachinglayer::ResourceUsage(0, 0);
{size_, 0}); }
} if (mmap_enabled) {
return milvus::cachinglayer::ResourceUsage(
0, index_->ByteSize());
} else {
return milvus::cachinglayer::ResourceUsage(index_->ByteSize(),
0);
}
}()) {
milvus::cachinglayer::Manager::GetInstance().ChargeLoadedResource(
loaded_size_);
} }
~TextMatchIndexHolder() { ~TextMatchIndexHolder() {
if (size_ > 0) { milvus::cachinglayer::Manager::GetInstance().RefundLoadedResource(
milvus::cachinglayer::Manager::GetInstance().RefundLoadedResource( loaded_size_);
{size_, 0});
}
} }
milvus::index::TextMatchIndex* milvus::index::TextMatchIndex*
@ -123,7 +130,7 @@ class TextMatchIndexHolder {
private: private:
std::unique_ptr<milvus::index::TextMatchIndex> index_; std::unique_ptr<milvus::index::TextMatchIndex> index_;
int64_t size_; const milvus::cachinglayer::ResourceUsage loaded_size_;
}; };
} // namespace milvus::index } // namespace milvus::index

View File

@ -1739,8 +1739,8 @@ ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) {
index->RegisterTokenizer("milvus_tokenizer", index->RegisterTokenizer("milvus_tokenizer",
field_meta.get_analyzer_params().c_str()); field_meta.get_analyzer_params().c_str());
text_indexes_[field_id] = text_indexes_[field_id] = std::make_shared<index::TextMatchIndexHolder>(
std::make_shared<index::TextMatchIndexHolder>(std::move(index)); std::move(index), cfg.GetScalarIndexEnableMmap());
} }
void void

View File

@ -41,7 +41,7 @@ TextMatchIndexTranslator::TextMatchIndexTranslator(
/* is_index */ true), /* is_index */ true),
milvus::segcore::getCacheWarmupPolicy(/* is_vector */ false, milvus::segcore::getCacheWarmupPolicy(/* is_vector */ false,
/* is_index */ true), /* is_index */ true),
/* support_eviction */ false) { /* support_eviction */ true) {
} }
size_t size_t
@ -59,7 +59,16 @@ std::pair<milvus::cachinglayer::ResourceUsage,
TextMatchIndexTranslator::estimated_byte_size_of_cell( TextMatchIndexTranslator::estimated_byte_size_of_cell(
milvus::cachinglayer::cid_t) const { milvus::cachinglayer::cid_t) const {
// ignore the cid checking, because there is only one cell // ignore the cid checking, because there is only one cell
return {{load_info_.index_size, 0}, {2 * load_info_.index_size, 0}}; if (load_info_.enable_mmap) {
return {{0, load_info_.index_size},
{load_info_.index_size, load_info_.index_size}};
} else {
// The reason the maximum disk usage is not zero is that the text match index
// is first written to the disk, then loaded into memory. Only after that are
// the disk files deleted.
return {{load_info_.index_size, 0},
{load_info_.index_size, load_info_.index_size}};
}
} }
int64_t int64_t
@ -98,7 +107,11 @@ TextMatchIndexTranslator::get_cells(
load_info_.field_id, load_info_.field_id,
load_info_.segment_id); load_info_.segment_id);
index->SetCellSize({index->ByteSize(), 0}); if (load_info_.enable_mmap) {
index->SetCellSize({0, index->ByteSize()});
} else {
index->SetCellSize({index->ByteSize(), 0});
}
std::vector<std::pair<milvus::cachinglayer::cid_t, std::vector<std::pair<milvus::cachinglayer::cid_t,
std::unique_ptr<milvus::index::TextMatchIndex>>> std::unique_ptr<milvus::index::TextMatchIndex>>>