From dce238e93955dcee0407be60949082e4910380d8 Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Tue, 18 May 2021 11:02:10 +0800 Subject: [PATCH] Dirty BloomFilter is left in the cache (#5252) Multi threads accessing will leave dirty BloomFilter in the cache. Now, reading threads will not overwrite the cache. And it should be solved. Issue 4897 involves BloomFilter and Blacklist. This commit try to fix BloomFilter firstly. Resolves: #4897 Signed-off-by: cqy123456 --- core/src/cache/Cache.h | 3 +++ core/src/cache/Cache.inl | 10 ++++++++++ core/src/cache/CacheMgr.h | 3 +++ core/src/cache/CacheMgr.inl | 11 +++++++++++ core/src/db/DBImpl.cpp | 2 +- core/src/db/insert/MemTable.cpp | 2 +- core/src/scheduler/JobMgr.cpp | 2 +- core/src/segment/SegmentReader.cpp | 8 ++++++-- core/src/segment/SegmentReader.h | 2 +- 9 files changed, 37 insertions(+), 6 deletions(-) diff --git a/core/src/cache/Cache.h b/core/src/cache/Cache.h index 34594c4573..94102ad0ac 100644 --- a/core/src/cache/Cache.h +++ b/core/src/cache/Cache.h @@ -66,6 +66,9 @@ class Cache { void insert(const std::string& key, const ItemObj& item); + void + insert_if_not_exist(const std::string& key, const ItemObj& item); + void erase(const std::string& key); diff --git a/core/src/cache/Cache.inl b/core/src/cache/Cache.inl index c371efe515..e24c699f5e 100644 --- a/core/src/cache/Cache.inl +++ b/core/src/cache/Cache.inl @@ -64,6 +64,16 @@ Cache::insert(const std::string& key, const ItemObj& item) { insert_internal(key, item); } +template +void +Cache::insert_if_not_exist(const std::string& key, const ItemObj& item) { + std::lock_guard lock(mutex_); + if (lru_.exists(key) == true) { + return ; + } + insert_internal(key, item); +} + template void Cache::erase(const std::string& key) { diff --git a/core/src/cache/CacheMgr.h b/core/src/cache/CacheMgr.h index 114b089c8f..00fe79b569 100644 --- a/core/src/cache/CacheMgr.h +++ b/core/src/cache/CacheMgr.h @@ -36,6 +36,9 @@ class CacheMgr { virtual void InsertItem(const std::string& key, const ItemObj& data); + virtual void + InsertItemIfNotExist(const std::string& key, const ItemObj& data); + virtual void EraseItem(const std::string& key); diff --git a/core/src/cache/CacheMgr.inl b/core/src/cache/CacheMgr.inl index 887e2a47f7..7ff248cf09 100644 --- a/core/src/cache/CacheMgr.inl +++ b/core/src/cache/CacheMgr.inl @@ -62,6 +62,17 @@ CacheMgr::InsertItem(const std::string& key, const ItemObj& data) { server::Metrics::GetInstance().CacheAccessTotalIncrement(); } +template +void +CacheMgr::InsertItemIfNotExist(const std::string& key, const ItemObj& data) { + if (cache_ == nullptr) { + LOG_SERVER_ERROR_ << "Cache doesn't exist"; + return; + } + cache_->insert_if_not_exist(key, data); + server::Metrics::GetInstance().CacheAccessTotalIncrement(); +} + template void CacheMgr::EraseItem(const std::string& key) { diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 78103e9dcc..26c9979c6c 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -1502,7 +1502,7 @@ DBImpl::GetVectorsByIdHelper(const IDNumbers& id_array, std::vectorCheck(id)) { diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 0856c749be..c3d2276545 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -98,7 +98,7 @@ JobMgr::worker_function() { engine::utils::GetParentPath(location, segment_dir); segment::SegmentReader segment_reader(segment_dir); segment::IdBloomFilterPtr id_bloom_filter_ptr; - segment_reader.LoadBloomFilter(id_bloom_filter_ptr); + segment_reader.LoadBloomFilter(id_bloom_filter_ptr, false); // Check if the id is present. bool pass = true; diff --git a/core/src/segment/SegmentReader.cpp b/core/src/segment/SegmentReader.cpp index 21f9f31820..9f29fe477f 100644 --- a/core/src/segment/SegmentReader.cpp +++ b/core/src/segment/SegmentReader.cpp @@ -104,7 +104,7 @@ SegmentReader::LoadVectorIndex(const std::string& location, segment::VectorIndex } Status -SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) { +SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr, bool cache_force) { codec::DefaultCodec default_codec; try { // load id_bloom_filter from cache @@ -117,7 +117,11 @@ SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) { default_codec.GetIdBloomFilterFormat()->read(fs_ptr_, id_bloom_filter_ptr); // add id_bloom_filter into cache - cache::CpuCacheMgr::GetInstance()->InsertItem(cache_key, id_bloom_filter_ptr); + if (cache_force) { + cache::CpuCacheMgr::GetInstance()->InsertItem(cache_key, id_bloom_filter_ptr); + } else { + cache::CpuCacheMgr::GetInstance()->InsertItemIfNotExist(cache_key, id_bloom_filter_ptr); + } } } catch (std::exception& e) { std::string err_msg = "Failed to load bloom filter: " + std::string(e.what()); diff --git a/core/src/segment/SegmentReader.h b/core/src/segment/SegmentReader.h index 1bc83de72d..5bf7174478 100644 --- a/core/src/segment/SegmentReader.h +++ b/core/src/segment/SegmentReader.h @@ -49,7 +49,7 @@ class SegmentReader { LoadVectorIndex(const std::string& location, segment::VectorIndexPtr& vector_index_ptr); Status - LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr); + LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr, bool cache_force); Status LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr);