From e7bc57e7a02802b87678c0588fecd98ec0c2856e Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 15 Jul 2025 17:05:32 +0800 Subject: [PATCH] fix: fix text match index / json key stats index leak when segment released (#43317) PR: https://github.com/milvus-io/milvus/pull/42655 issue: https://github.com/milvus-io/milvus/issues/42626 --------- Signed-off-by: SpadeA --- .../core/src/storage/DiskFileManagerImpl.cpp | 4 ++ internal/core/src/storage/Util.cpp | 40 +++++++++++++------ internal/core/src/storage/Util.h | 12 ++++-- .../unittest/test_disk_file_manager_test.cpp | 37 +++++++++++++++++ tests/python_client/Dockerfile | 2 +- 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index 049a58f8f6..956e2cd9e6 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -57,6 +57,10 @@ DiskFileManagerImpl::~DiskFileManagerImpl() { LocalChunkManagerSingleton::GetInstance().GetChunkManager(); local_chunk_manager->RemoveDir(GetIndexPathPrefixWithBuildID( local_chunk_manager, index_meta_.build_id)); + local_chunk_manager->RemoveDir(GetTextIndexPathPrefixWithBuildID( + local_chunk_manager, index_meta_.build_id)); + local_chunk_manager->RemoveDir(GetJsonKeyIndexPathPrefixWithBuildID( + local_chunk_manager, index_meta_.build_id)); } bool diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 85ec72152c..e1bb95579a 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -488,16 +488,6 @@ GenIndexPathIdentifier(int64_t build_id, int64_t index_version) { return std::to_string(build_id) + "/" + std::to_string(index_version) + "/"; } -std::string -GenTextIndexPathIdentifier(int64_t build_id, - int64_t index_version, - int64_t segment_id, - int64_t field_id) { - return std::to_string(build_id) + "/" + std::to_string(index_version) + - "/" + std::to_string(segment_id) + "/" + std::to_string(field_id) + - "/"; -} - std::string GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, @@ -509,6 +499,24 @@ GenIndexPathPrefix(ChunkManagerPtr cm, return (prefix / path / path1).string(); } +std::string +GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) { + boost::filesystem::path prefix = cm->GetRootPath(); + boost::filesystem::path path = std::string(INDEX_ROOT_PATH); + boost::filesystem::path path1 = std::to_string(build_id); + return (prefix / path / path1).string(); +} + +std::string +GenTextIndexPathIdentifier(int64_t build_id, + int64_t index_version, + int64_t segment_id, + int64_t field_id) { + return std::to_string(build_id) + "/" + std::to_string(index_version) + + "/" + std::to_string(segment_id) + "/" + std::to_string(field_id) + + "/"; +} + std::string GenTextIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, @@ -522,6 +530,14 @@ GenTextIndexPathPrefix(ChunkManagerPtr cm, return (prefix / path / path1).string(); } +std::string +GetTextIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) { + boost::filesystem::path prefix = cm->GetRootPath(); + boost::filesystem::path path = std::string(TEXT_LOG_ROOT_PATH); + boost::filesystem::path path1 = std::to_string(build_id); + return (prefix / path / path1).string(); +} + std::string GenJsonKeyIndexPathIdentifier(int64_t build_id, int64_t index_version, @@ -554,9 +570,9 @@ GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm, } std::string -GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) { +GetJsonKeyIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) { boost::filesystem::path prefix = cm->GetRootPath(); - boost::filesystem::path path = std::string(INDEX_ROOT_PATH); + boost::filesystem::path path = std::string(JSON_KEY_INDEX_LOG_ROOT_PATH); boost::filesystem::path path1 = std::to_string(build_id); return (prefix / path / path1).string(); } diff --git a/internal/core/src/storage/Util.h b/internal/core/src/storage/Util.h index 511656220d..c62e2cb1ac 100644 --- a/internal/core/src/storage/Util.h +++ b/internal/core/src/storage/Util.h @@ -71,10 +71,13 @@ GetDimensionFromArrowArray(std::shared_ptr array, DataType data_type); std::string -GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id); +GenIndexPathIdentifier(int64_t build_id, int64_t index_version); std::string -GenIndexPathIdentifier(int64_t build_id, int64_t index_version); +GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version); + +std::string +GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id); std::string GenTextIndexPathIdentifier(int64_t build_id, @@ -83,7 +86,7 @@ GenTextIndexPathIdentifier(int64_t build_id, int64_t field_id); std::string -GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version); +GetTextIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id); std::string GenTextIndexPathPrefix(ChunkManagerPtr cm, @@ -100,6 +103,9 @@ GenJsonKeyIndexPathIdentifier(int64_t build_id, int64_t segment_id, int64_t field_id); +std::string +GetJsonKeyIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id); + std::string GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, diff --git a/internal/core/unittest/test_disk_file_manager_test.cpp b/internal/core/unittest/test_disk_file_manager_test.cpp index ba0800a9a0..628de1dd63 100644 --- a/internal/core/unittest/test_disk_file_manager_test.cpp +++ b/internal/core/unittest/test_disk_file_manager_test.cpp @@ -398,3 +398,40 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) { ASSERT_TRUE(res.empty()); } } + +TEST_F(DiskAnnFileManagerTest, FileCleanup) { + std::string local_index_file_path; + std::string local_text_index_file_path; + std::string local_json_key_index_file_path; + + auto local_chunk_manager = + LocalChunkManagerSingleton::GetInstance().GetChunkManager(); + + { + auto file_manager = CreateFileManager(cm_); + + auto random_file_suffix = std::to_string(rand()); + local_text_index_file_path = + file_manager->GetLocalTextIndexPrefix() + random_file_suffix; + local_json_key_index_file_path = + file_manager->GetLocalJsonKeyIndexPrefix() + random_file_suffix; + local_index_file_path = + file_manager->GetLocalIndexObjectPrefix() + random_file_suffix; + + local_chunk_manager->CreateFile(local_text_index_file_path); + local_chunk_manager->CreateFile(local_json_key_index_file_path); + local_chunk_manager->CreateFile(local_index_file_path); + + // verify these files exist + EXPECT_TRUE( + file_manager->IsExisted(local_text_index_file_path).value()); + EXPECT_TRUE( + file_manager->IsExisted(local_json_key_index_file_path).value()); + EXPECT_TRUE(file_manager->IsExisted(local_index_file_path).value()); + } + + // verify these files not exist + EXPECT_FALSE(local_chunk_manager->Exist(local_text_index_file_path)); + EXPECT_FALSE(local_chunk_manager->Exist(local_json_key_index_file_path)); + EXPECT_FALSE(local_chunk_manager->Exist(local_index_file_path)); +} \ No newline at end of file diff --git a/tests/python_client/Dockerfile b/tests/python_client/Dockerfile index 24fad77b31..531067eab3 100644 --- a/tests/python_client/Dockerfile +++ b/tests/python_client/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-buster +FROM python:3.10-bullseye RUN apt-get update && apt-get install -y jq