feat: manual evict cache after built interim index (#41836)

issue: https://github.com/milvus-io/milvus/issues/41435

this PR also makes HasRawData of ChunkedSegmentSealedImpl to return
based on metadata, without needing to load the cache just to answer this
simple question.

---------

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
Buqian Zheng 2025-05-16 16:34:23 +08:00 committed by GitHub
parent 4edb1bc6f1
commit b0260d8676
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 144 additions and 64 deletions

View File

@ -166,6 +166,25 @@ class CacheSlot final : public std::enable_shared_from_this<CacheSlot<CellT>> {
});
}
// Manually evicts the cell if it is not pinned.
// Returns true if the cell ends up in a state other than LOADED.
bool
ManualEvict(cid_t cid) {
return cells_[cid].manual_evict();
}
// Returns true if any cell is evicted.
bool
ManualEvictAll() {
bool evicted = false;
for (cid_t cid = 0; cid < cells_.size(); ++cid) {
if (cells_[cid].manual_evict()) {
evicted = true;
}
}
return evicted;
}
size_t
num_cells() const {
return translator_->num_cells();

View File

@ -25,6 +25,8 @@ struct Meta {
// In actual resource reservation, we use the actual size of the cell to determine the type.
StorageType storage_type;
CacheWarmupPolicy cache_warmup_policy;
// Whether the translator supports strategy based eviction.
// Does not affect manual eviction.
bool support_eviction;
explicit Meta(StorageType storage_type,
CacheWarmupPolicy cache_warmup_policy,

View File

@ -23,6 +23,7 @@
#include "cachinglayer/lrucache/DList.h"
#include "cachinglayer/Utils.h"
#include "common/EasyAssert.h"
#include "log/Log.h"
namespace milvus::cachinglayer::internal {
@ -64,6 +65,28 @@ ListNode::~ListNode() {
}
}
bool
ListNode::manual_evict() {
std::unique_lock<std::shared_mutex> lock(mtx_);
if (state_ == State::ERROR || state_ == State::LOADING) {
LOG_ERROR("manual_evict() called on a {} cell", state_to_string(state_));
return true;
}
if (state_ == State::NOT_LOADED) {
return true;
}
if (pin_count_.load() > 0) {
LOG_ERROR("manual_evict() called on a LOADED and pinned cell, aborting eviction.");
return false;
}
// cell is LOADED
clear_data();
if (dlist_) {
dlist_->removeItem(this, size_);
}
return true;
}
ResourceUsage&
ListNode::size() {
return size_;

View File

@ -74,6 +74,11 @@ class ListNode {
ResourceUsage&
size();
// Manually evicts the cell if it is not pinned.
// Returns true if the cell ends up in a state other than LOADED.
bool
manual_evict();
// TODO(tiered storage 1): pin on ERROR should re-trigger loading.
// NOT_LOADED ---> LOADING ---> ERROR
// ^ |

View File

@ -69,6 +69,11 @@ class ChunkedColumnBase : public ChunkedColumnInterface {
virtual ~ChunkedColumnBase() = default;
void
ManualEvictCache() const override {
slot_->ManualEvictAll();
}
PinWrapper<const char*>
DataOfChunk(int chunk_id) const override {
auto ca = SemiInlineGet(slot_->PinCells({chunk_id}));

View File

@ -56,6 +56,11 @@ class ChunkedColumnGroup {
virtual ~ChunkedColumnGroup() = default;
void
ManualEvictCache() const {
slot_->ManualEvictAll();
}
// Get the number of group chunks
size_t
num_chunks() const {
@ -94,6 +99,14 @@ class ChunkedColumnGroup {
return meta->num_rows_until_chunk_;
}
size_t
NumFieldsInGroup() const {
auto meta =
static_cast<milvus::segcore::storagev2translator::GroupCTMeta*>(
slot_->meta());
return meta->num_fields_;
}
protected:
mutable std::shared_ptr<CacheSlot<GroupChunk>> slot_;
size_t num_chunks_{0};
@ -111,6 +124,13 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
data_type_(field_meta.get_data_type()) {
}
void
ManualEvictCache() const override {
if (group_->NumFieldsInGroup() == 1) {
group_->ManualEvictCache();
}
}
PinWrapper<const char*>
DataOfChunk(int chunk_id) const override {
auto group_chunk = group_->GetGroupChunk(chunk_id);

View File

@ -26,6 +26,10 @@ class ChunkedColumnInterface {
public:
virtual ~ChunkedColumnInterface() = default;
// Default implementation does nothing.
virtual void
ManualEvictCache() const {}
// Get raw data pointer of a specific chunk
virtual cachinglayer::PinWrapper<const char*>
DataOfChunk(int chunk_id) const = 0;

View File

@ -113,7 +113,16 @@ ChunkedSegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
"Before setting field_bit for field index, fieldID:{}. segmentID:{}, ",
info.field_id,
id_);
if (get_bit(field_data_ready_bitset_, field_id)) {
auto& field_meta = schema_->operator[](field_id);
LoadResourceRequest request =
milvus::index::IndexFactory::GetInstance().VecIndexLoadResource(
field_meta.get_data_type(),
info.index_engine_version,
info.index_size,
info.index_params,
info.enable_mmap);
if (request.has_raw_data && get_bit(field_data_ready_bitset_, field_id)) {
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
} else if (get_bit(binlog_index_bitset_, field_id)) {
@ -125,6 +134,7 @@ ChunkedSegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
metric_type,
std::move(const_cast<LoadIndexInfo&>(info).cache_index));
set_bit(index_ready_bitset_, field_id, true);
index_has_raw_data_[field_id] = request.has_raw_data;
LOG_INFO("Has load vec index done, fieldID:{}. segmentID:{}, ",
info.field_id,
id_);
@ -172,6 +182,7 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
info.enable_mmap);
set_bit(index_ready_bitset_, field_id, true);
index_has_raw_data_[field_id] = request.has_raw_data;
// release field column if the index contains raw data
// only release non-primary field when in pk sorted mode
if (request.has_raw_data && get_bit(field_data_ready_bitset_, field_id) &&
@ -1047,6 +1058,7 @@ ChunkedSegmentSealedImpl::ClearData() {
field_data_ready_bitset_.reset();
index_ready_bitset_.reset();
binlog_index_bitset_.reset();
index_has_raw_data_.clear();
system_ready_count_ = 0;
num_rows_ = std::nullopt;
scalar_indexings_.clear();
@ -1453,19 +1465,22 @@ ChunkedSegmentSealedImpl::HasRawData(int64_t field_id) const {
get_bit(binlog_index_bitset_, fieldID)) {
AssertInfo(vector_indexings_.is_ready(fieldID),
"vector index is not ready");
auto accessor =
SemiInlineGet(vector_indexings_.get_field_indexing(fieldID)
->indexing_->PinCells({0}));
auto vec_index = accessor->get_cell_of(0);
return vec_index->HasRawData();
AssertInfo(index_has_raw_data_.find(fieldID) !=
index_has_raw_data_.end(),
"index_has_raw_data_ is not set for fieldID: " +
std::to_string(fieldID.get()));
return index_has_raw_data_.at(fieldID);
}
} else if (IsJsonDataType(field_meta.get_data_type())) {
return get_bit(field_data_ready_bitset_, fieldID);
} else {
auto scalar_index = scalar_indexings_.find(fieldID);
if (scalar_index != scalar_indexings_.end()) {
auto accessor = SemiInlineGet(scalar_index->second->PinCells({0}));
return accessor->get_cell_of(0)->HasRawData();
AssertInfo(index_has_raw_data_.find(fieldID) !=
index_has_raw_data_.end(),
"index_has_raw_data_ is not set for fieldID: " +
std::to_string(fieldID.get()));
return index_has_raw_data_.at(fieldID);
}
}
return true;
@ -1726,6 +1741,7 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) {
vec_binlog_config_[field_id] = std::move(field_binlog_config);
set_bit(binlog_index_bitset_, field_id, true);
index_has_raw_data_[field_id] = true;
LOG_INFO(
"replace binlog with binlog index in segment {}, field {}.",
this->get_segment_id(),
@ -1788,19 +1804,15 @@ ChunkedSegmentSealedImpl::load_field_data_common(
insert_record_.seal_pks();
}
if (generate_interim_index(field_id)) {
std::unique_lock lck(mutex_);
// mmap_fields is useless, no change
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
} else {
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
}
bool generated_interim_index = generate_interim_index(field_id);
{
std::unique_lock lck(mutex_);
update_row_count(num_rows);
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
update_row_count(num_rows);
if (generated_interim_index) {
if (auto column = fields_.find(field_id); column != fields_.end()) {
column->second->ManualEvictCache();
}
}
}

View File

@ -408,7 +408,10 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
BitsetType index_ready_bitset_;
BitsetType binlog_index_bitset_;
std::atomic<int> system_ready_count_ = 0;
// segment data
// when index is ready (index_ready_bitset_/binlog_index_bitset_ is set to true), must also set index_has_raw_data_
// to indicate whether the loaded index has raw data.
std::unordered_map<FieldId, bool> index_has_raw_data_;
// TODO: generate index for scalar
std::optional<int64_t> num_rows_;

View File

@ -22,11 +22,14 @@ namespace milvus::segcore::storagev2translator {
struct GroupCTMeta : public milvus::cachinglayer::Meta {
std::vector<int64_t> num_rows_until_chunk_;
std::vector<int64_t> chunk_memory_size_;
GroupCTMeta(milvus::cachinglayer::StorageType storage_type,
size_t num_fields_;
GroupCTMeta(size_t num_fields,
milvus::cachinglayer::StorageType storage_type,
CacheWarmupPolicy cache_warmup_policy,
bool support_eviction)
: milvus::cachinglayer::Meta(
storage_type, cache_warmup_policy, support_eviction) {
storage_type, cache_warmup_policy, support_eviction),
num_fields_(num_fields) {
}
};

View File

@ -52,6 +52,7 @@ GroupChunkTranslator::GroupChunkTranslator(
row_group_meta_list_(row_group_meta_list),
field_id_list_(field_id_list),
meta_(
field_id_list.size(),
use_mmap ? milvus::cachinglayer::StorageType::DISK
: milvus::cachinglayer::StorageType::MEMORY,
// TODO(tiered storage 2): vector may be of small size and mixed with scalar, do we force it

View File

@ -14,7 +14,7 @@
# Update KNOWHERE_VERSION for the first occurrence
milvus_add_pkg_config("knowhere")
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( KNOWHERE_VERSION 5734ebe )
set( KNOWHERE_VERSION 725c197 )
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")
message(STATUS "Knowhere version: ${KNOWHERE_VERSION}")

View File

@ -188,7 +188,7 @@ TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
//assert segment has been built binlog index
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
EXPECT_TRUE(segment->HasFieldData(vec_field_id));
// 2. search binlog index
auto num_queries = 10;
@ -251,11 +251,11 @@ TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
load_info.cache_index =
CreateTestCacheIndex("test", std::move(indexing));
load_info.index_params["metric_type"] = metric_type;
segment->DropFieldData(vec_field_id);
ASSERT_NO_THROW(segment->LoadIndex(load_info));
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
// only INDEX_FAISS_IVFFLAT has raw data, thus it should release the raw field data.
EXPECT_EQ(segment->HasFieldData(vec_field_id), index_type != knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63, 0);
auto similary = GetKnnSearchRecall(num_queries,
binlog_index_sr->seg_offsets_.data(),
@ -281,7 +281,7 @@ TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
//assert segment has been built binlog index
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
EXPECT_TRUE(segment->HasFieldData(vec_field_id));
// 2. search binlog index
auto num_queries = 10;
@ -345,11 +345,10 @@ TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
load_info.cache_index =
CreateTestCacheIndex("test", std::move(indexing));
load_info.index_params["metric_type"] = metric_type;
segment->DropFieldData(vec_field_id);
ASSERT_NO_THROW(segment->LoadIndex(load_info));
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
EXPECT_EQ(segment->HasFieldData(vec_field_id), index_type != knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63);
auto similary = GetKnnSearchRecall(num_queries,
binlog_index_sr->seg_offsets_.data(),
@ -395,11 +394,11 @@ TEST_P(BinlogIndexTest, DisableInterimIndex) {
load_info.cache_index = CreateTestCacheIndex("test", std::move(indexing));
load_info.index_params["metric_type"] = metric_type;
segment->DropFieldData(vec_field_id);
ASSERT_NO_THROW(segment->LoadIndex(load_info));
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
EXPECT_EQ(segment->HasFieldData(vec_field_id),
index_type != knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
}
TEST_P(BinlogIndexTest, LoadBingLogWihIDMAP) {

View File

@ -100,13 +100,15 @@ class TestChunkTranslator : public Translator<milvus::Chunk> {
class TestGroupChunkTranslator : public Translator<milvus::GroupChunk> {
public:
TestGroupChunkTranslator(std::vector<int64_t> num_rows_per_chunk,
TestGroupChunkTranslator(size_t num_fields,
std::vector<int64_t> num_rows_per_chunk,
std::string key,
std::vector<std::unique_ptr<GroupChunk>>&& chunks)
: Translator<milvus::GroupChunk>(),
num_cells_(num_rows_per_chunk.size()),
chunks_(std::move(chunks)),
meta_(segcore::storagev2translator::GroupCTMeta(
num_fields,
StorageType::MEMORY,
CacheWarmupPolicy::CacheWarmupPolicy_Disable,
true)) {

View File

@ -213,7 +213,7 @@ TEST_F(ChunkedColumnGroupTest, ChunkedColumnGroup) {
std::vector<std::unique_ptr<GroupChunk>> group_chunks;
group_chunks.push_back(std::move(group_chunk));
auto translator = std::make_unique<TestGroupChunkTranslator>(
std::vector<int64_t>{5}, "test_key", std::move(group_chunks));
2, std::vector<int64_t>{5}, "test_key", std::move(group_chunks));
auto column_group =
std::make_shared<ChunkedColumnGroup>(std::move(translator));
@ -250,7 +250,7 @@ TEST_F(ChunkedColumnGroupTest, ProxyChunkColumn) {
std::vector<std::unique_ptr<GroupChunk>> group_chunks;
group_chunks.push_back(std::move(group_chunk));
auto translator = std::make_unique<TestGroupChunkTranslator>(
std::vector<int64_t>{5}, "test_key", std::move(group_chunks));
2, std::vector<int64_t>{5}, "test_key", std::move(group_chunks));
auto column_group =
std::make_shared<ChunkedColumnGroup>(std::move(translator));

View File

@ -38,7 +38,7 @@ using namespace milvus;
using namespace milvus::segcore;
using namespace milvus::segcore::storagev2translator;
class TestGroupChunkTranslator : public ::testing::TestWithParam<bool> {
class GroupChunkTranslatorTest : public ::testing::TestWithParam<bool> {
void
SetUp() override {
auto conf = milvus_storage::ArrowFileSystemConfig();
@ -77,7 +77,7 @@ class TestGroupChunkTranslator : public ::testing::TestWithParam<bool> {
}
protected:
~TestGroupChunkTranslator() {
~GroupChunkTranslatorTest() {
if (GetParam()) { // if use_mmap is true
std::string mmap_dir = std::to_string(segment_id_);
if (std::filesystem::exists(mmap_dir)) {
@ -95,7 +95,7 @@ class TestGroupChunkTranslator : public ::testing::TestWithParam<bool> {
int64_t segment_id_ = 0;
};
TEST_P(TestGroupChunkTranslator, TestWithMmap) {
TEST_P(GroupChunkTranslatorTest, TestWithMmap) {
auto use_mmap = GetParam();
std::unordered_map<FieldId, FieldMeta> field_metas = schema_->get_fields();
auto column_group_info = FieldDataInfo(0, 3000, "");
@ -157,6 +157,6 @@ TEST_P(TestGroupChunkTranslator, TestWithMmap) {
}
}
INSTANTIATE_TEST_SUITE_P(TestGroupChunkTranslator,
TestGroupChunkTranslator,
INSTANTIATE_TEST_SUITE_P(GroupChunkTranslatorTest,
GroupChunkTranslatorTest,
testing::Bool());

View File

@ -248,9 +248,9 @@ class SealedSegmentRegexQueryTest : public ::testing::Test {
*(arr.mutable_data()->Add()) = raw_str[i];
}
auto index = index::CreateStringIndexSort();
std::vector<uint8_t> buffer(arr.ByteSize());
ASSERT_TRUE(arr.SerializeToArray(buffer.data(), arr.ByteSize()));
index->BuildWithRawDataForUT(arr.ByteSize(), buffer.data());
std::vector<uint8_t> buffer(arr.ByteSizeLong());
ASSERT_TRUE(arr.SerializeToArray(buffer.data(), arr.ByteSizeLong()));
index->BuildWithRawDataForUT(arr.ByteSizeLong(), buffer.data());
LoadIndexInfo info{
.field_id = schema->get_field_id(FieldName("str")).get(),
.index_params = GenIndexParams(index.get()),
@ -291,9 +291,9 @@ class SealedSegmentRegexQueryTest : public ::testing::Test {
*(arr.mutable_data()->Add()) = raw_str[i];
}
auto index = std::make_unique<MockStringIndex>();
std::vector<uint8_t> buffer(arr.ByteSize());
ASSERT_TRUE(arr.SerializeToArray(buffer.data(), arr.ByteSize()));
index->BuildWithRawDataForUT(arr.ByteSize(), buffer.data());
std::vector<uint8_t> buffer(arr.ByteSizeLong());
ASSERT_TRUE(arr.SerializeToArray(buffer.data(), arr.ByteSizeLong()));
index->BuildWithRawDataForUT(arr.ByteSizeLong(), buffer.data());
LoadIndexInfo info{
.field_id = schema->get_field_id(FieldName("str")).get(),
.index_params = GenIndexParams(index.get()),

View File

@ -60,7 +60,6 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/indexparams"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
"github.com/milvus-io/milvus/pkg/v2/util/metric"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/timerecord"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
@ -1069,23 +1068,6 @@ func (s *LocalSegment) innerLoadIndex(ctx context.Context,
return err
}
updateIndexInfoSpan := tr.RecordSpan()
// Skip warnup chunk cache when
// . scalar data
// . index has row data
// . vector was bm25 function output
if !typeutil.IsVectorType(fieldType) || s.HasRawData(indexInfo.GetFieldID()) {
return nil
}
metricType, err := funcutil.GetAttrByKeyFromRepeatedKV(common.MetricTypeKey, indexInfo.IndexParams)
if err != nil {
return errors.New("metric type not exist in index params")
}
if metricType == metric.BM25 {
return nil
}
log.Info("Finish loading index",
zap.Duration("newLoadIndexInfoSpan", newLoadIndexInfoSpan),