mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
fix: Include fieldID in raw data cleanup to prevent delete other fields (#46688)
issue: #46687 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: raw-data cleanup must be scoped to (segment_id, field_id) so deleting temporary raw files for one field never removes raw files for other fields in the same segment (prevents cross-field deletion during index builds). - Root cause and fix (bug): VectorDiskIndex::Build() and BuildWithDataset() called RemoveDir on the segment-level path; this removed rawdata/{segment_id}/. The fix changes both calls to remove storage::GenFieldRawDataPathPrefix(local_chunk_manager, segment_id, field_id) instead, limiting cleanup to rawdata/{segment_id}_{field_id}/ (field-scoped). - Logic removed/simplified: the old helper GetSegmentRawDataPathPrefix was removed and callers were switched to GenFieldRawDataPathPrefix; cleanup logic is simplified from segment-level to field-level path generation and removal, eliminating redundant broad deletions. - Why this does NOT cause data loss or regress behavior: the change narrows RemoveDir() to the exact field path used when caching raw data and offsets earlier in Build (offsets_path and CacheRawDataToDisk produce field-scoped local paths). Build still writes/reads offsets and raw data from GenFieldRawDataPathPrefix(...) and then removes that same prefix after successful index.Build(); therefore only temporary files for the built field are deleted and other fields’ raw files under the same segment are preserved. This fixes issue #46687 by preventing accidental deletion of other fields’ raw data. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
This commit is contained in:
parent
26c86ec221
commit
b13aac5164
@ -227,8 +227,8 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||
ThrowInfo(ErrorCode::IndexBuildError,
|
||||
"failed to build disk index, " + KnowhereStatusString(stat));
|
||||
|
||||
local_chunk_manager->RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
||||
local_chunk_manager->RemoveDir(storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, field_id));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -335,8 +335,8 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
||||
file_manager_->AddFile(valid_data_path);
|
||||
}
|
||||
|
||||
local_chunk_manager->RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
||||
local_chunk_manager->RemoveDir(storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, field_id));
|
||||
|
||||
// TODO ::
|
||||
// SetDim(index_->Dim());
|
||||
|
||||
@ -942,15 +942,7 @@ GenFieldRawDataPathPrefix(ChunkManagerPtr cm,
|
||||
boost::filesystem::path prefix = cm->GetRootPath();
|
||||
boost::filesystem::path path = std::string(RAWDATA_ROOT_PATH);
|
||||
boost::filesystem::path path1 =
|
||||
std::to_string(segment_id) + "/" + std::to_string(field_id) + "/";
|
||||
return NormalizePath(prefix / path / path1);
|
||||
}
|
||||
|
||||
std::string
|
||||
GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id) {
|
||||
boost::filesystem::path prefix = cm->GetRootPath();
|
||||
boost::filesystem::path path = std::string(RAWDATA_ROOT_PATH);
|
||||
boost::filesystem::path path1 = std::to_string(segment_id);
|
||||
std::to_string(segment_id) + "_" + std::to_string(field_id) + "/";
|
||||
return NormalizePath(prefix / path / path1);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user