fix: fix text match index / json key stats index leak when segment released (#43317)

PR: https://github.com/milvus-io/milvus/pull/42655
issue: https://github.com/milvus-io/milvus/issues/42626

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
Spade A 2025-07-15 17:05:32 +08:00 committed by GitHub
parent 8591b3913e
commit e7bc57e7a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 79 additions and 16 deletions

View File

@ -57,6 +57,10 @@ DiskFileManagerImpl::~DiskFileManagerImpl() {
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
local_chunk_manager->RemoveDir(GetIndexPathPrefixWithBuildID(
local_chunk_manager, index_meta_.build_id));
local_chunk_manager->RemoveDir(GetTextIndexPathPrefixWithBuildID(
local_chunk_manager, index_meta_.build_id));
local_chunk_manager->RemoveDir(GetJsonKeyIndexPathPrefixWithBuildID(
local_chunk_manager, index_meta_.build_id));
}
bool

View File

@ -488,16 +488,6 @@ GenIndexPathIdentifier(int64_t build_id, int64_t index_version) {
return std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
}
std::string
GenTextIndexPathIdentifier(int64_t build_id,
int64_t index_version,
int64_t segment_id,
int64_t field_id) {
return std::to_string(build_id) + "/" + std::to_string(index_version) +
"/" + std::to_string(segment_id) + "/" + std::to_string(field_id) +
"/";
}
std::string
GenIndexPathPrefix(ChunkManagerPtr cm,
int64_t build_id,
@ -509,6 +499,24 @@ GenIndexPathPrefix(ChunkManagerPtr cm,
return (prefix / path / path1).string();
}
std::string
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
boost::filesystem::path prefix = cm->GetRootPath();
boost::filesystem::path path = std::string(INDEX_ROOT_PATH);
boost::filesystem::path path1 = std::to_string(build_id);
return (prefix / path / path1).string();
}
std::string
GenTextIndexPathIdentifier(int64_t build_id,
int64_t index_version,
int64_t segment_id,
int64_t field_id) {
return std::to_string(build_id) + "/" + std::to_string(index_version) +
"/" + std::to_string(segment_id) + "/" + std::to_string(field_id) +
"/";
}
std::string
GenTextIndexPathPrefix(ChunkManagerPtr cm,
int64_t build_id,
@ -522,6 +530,14 @@ GenTextIndexPathPrefix(ChunkManagerPtr cm,
return (prefix / path / path1).string();
}
std::string
GetTextIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
boost::filesystem::path prefix = cm->GetRootPath();
boost::filesystem::path path = std::string(TEXT_LOG_ROOT_PATH);
boost::filesystem::path path1 = std::to_string(build_id);
return (prefix / path / path1).string();
}
std::string
GenJsonKeyIndexPathIdentifier(int64_t build_id,
int64_t index_version,
@ -554,9 +570,9 @@ GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm,
}
std::string
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
GetJsonKeyIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
boost::filesystem::path prefix = cm->GetRootPath();
boost::filesystem::path path = std::string(INDEX_ROOT_PATH);
boost::filesystem::path path = std::string(JSON_KEY_INDEX_LOG_ROOT_PATH);
boost::filesystem::path path1 = std::to_string(build_id);
return (prefix / path / path1).string();
}

View File

@ -71,10 +71,13 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> array,
DataType data_type);
std::string
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
GenIndexPathIdentifier(int64_t build_id, int64_t index_version);
std::string
GenIndexPathIdentifier(int64_t build_id, int64_t index_version);
GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version);
std::string
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
std::string
GenTextIndexPathIdentifier(int64_t build_id,
@ -83,7 +86,7 @@ GenTextIndexPathIdentifier(int64_t build_id,
int64_t field_id);
std::string
GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version);
GetTextIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
std::string
GenTextIndexPathPrefix(ChunkManagerPtr cm,
@ -100,6 +103,9 @@ GenJsonKeyIndexPathIdentifier(int64_t build_id,
int64_t segment_id,
int64_t field_id);
std::string
GetJsonKeyIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
std::string
GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm,
int64_t build_id,

View File

@ -398,3 +398,40 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
ASSERT_TRUE(res.empty());
}
}
TEST_F(DiskAnnFileManagerTest, FileCleanup) {
std::string local_index_file_path;
std::string local_text_index_file_path;
std::string local_json_key_index_file_path;
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
{
auto file_manager = CreateFileManager(cm_);
auto random_file_suffix = std::to_string(rand());
local_text_index_file_path =
file_manager->GetLocalTextIndexPrefix() + random_file_suffix;
local_json_key_index_file_path =
file_manager->GetLocalJsonKeyIndexPrefix() + random_file_suffix;
local_index_file_path =
file_manager->GetLocalIndexObjectPrefix() + random_file_suffix;
local_chunk_manager->CreateFile(local_text_index_file_path);
local_chunk_manager->CreateFile(local_json_key_index_file_path);
local_chunk_manager->CreateFile(local_index_file_path);
// verify these files exist
EXPECT_TRUE(
file_manager->IsExisted(local_text_index_file_path).value());
EXPECT_TRUE(
file_manager->IsExisted(local_json_key_index_file_path).value());
EXPECT_TRUE(file_manager->IsExisted(local_index_file_path).value());
}
// verify these files not exist
EXPECT_FALSE(local_chunk_manager->Exist(local_text_index_file_path));
EXPECT_FALSE(local_chunk_manager->Exist(local_json_key_index_file_path));
EXPECT_FALSE(local_chunk_manager->Exist(local_index_file_path));
}

View File

@ -1,4 +1,4 @@
FROM python:3.10-buster
FROM python:3.10-bullseye
RUN apt-get update && apt-get install -y jq