From b9b554676c93b5cfcff6acd303f11c56661f231e Mon Sep 17 00:00:00 2001 From: sthuang <167743503+shaoting-huang@users.noreply.github.com> Date: Tue, 27 May 2025 15:26:28 +0800 Subject: [PATCH] fix: storage v2 get field data with correct column group files (#42107) related: #39173 Signed-off-by: shaoting-huang --- internal/core/src/storage/Util.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 7ddd43626b..e4cebc94e3 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -1023,10 +1023,24 @@ GetFieldDatasFromStorageV2(std::vector>& remote_files, AssertInfo(remote_files.size() > 0, "remote files size is 0"); std::vector field_data_list; + // remote files might not followed the sequence of column group id, + // so we need to put into map + std::unordered_map> column_group_files; for (int i = 0; i < remote_files.size(); i++) { auto& remote_chunk_files = remote_files[i]; AssertInfo(remote_chunk_files.size() > 0, "remote files size is 0"); + // find second last of / to get group_id + std::string path = remote_chunk_files[0]; + size_t last_slash = path.find_last_of("/"); + size_t second_last_slash = path.find_last_of("/", last_slash - 1); + int64_t group_id = std::stol(path.substr( + second_last_slash + 1, last_slash - second_last_slash - 1)); + + column_group_files[group_id] = remote_chunk_files; + } + + for (auto& [column_group_id, remote_chunk_files] : column_group_files) { auto fs = milvus_storage::ArrowFileSystemSingleton::GetInstance() .GetArrowFileSystem(); // read first file to get path and column offset of the field id @@ -1045,9 +1059,9 @@ GetFieldDatasFromStorageV2(std::vector>& remote_files, AssertInfo(column_offset.path_index < remote_files.size(), "column offset path index {} is out of range", column_offset.path_index); - if (column_offset.path_index != i) { + if (column_offset.path_index != column_group_id) { LOG_INFO("Skip group id {} since target field shall be in group {}", - i, + column_group_id, column_offset.path_index); continue; }