fix: storage v2 write mmap file per field per cell (#42180)

Each cell of a field should be written to its own mmap file, rather than
writing all cells of the field into a single mmap file.
related: #39173

---------

Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
This commit is contained in:
sthuang 2025-06-09 11:48:33 +08:00 committed by GitHub
parent 6e16653597
commit b136f85ca0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 48 additions and 99 deletions

View File

@ -82,12 +82,6 @@ GroupChunkTranslator::GroupChunkTranslator(
}
GroupChunkTranslator::~GroupChunkTranslator() {
for (auto chunk : group_chunks_) {
if (chunk != nullptr) {
// let the GroupChunk to be deleted by the unique_ptr
auto chunk_ptr = std::unique_ptr<GroupChunk>(chunk);
}
}
}
size_t
@ -166,77 +160,31 @@ GroupChunkTranslator::get_cells(const std::vector<cachinglayer::cid_t>& cids) {
LOG_INFO("segment {} submits load fields {} task to thread pool",
segment_id_,
field_id_list_.ToString());
if (!use_mmap_) {
load_column_group_in_memory();
} else {
load_column_group_in_mmap();
}
for (auto cid : cids) {
AssertInfo(group_chunks_[cid] != nullptr,
"GroupChunkTranslator::get_cells failed to load cell {} of "
"CacheSlot {}.",
cid,
key_);
cells.emplace_back(
cid, std::unique_ptr<milvus::GroupChunk>(group_chunks_[cid]));
group_chunks_[cid] = nullptr;
std::shared_ptr<milvus::ArrowDataWrapper> r;
int64_t cid_idx = 0;
int64_t total_tables = 0;
while (column_group_info_.arrow_reader_channel->pop(r)) {
for (const auto& table : r->arrow_tables) {
AssertInfo(cid_idx < cids.size(),
"Number of tables exceed number of cids ({})",
cids.size());
auto cid = cids[cid_idx++];
cells.emplace_back(cid, load_group_chunk(table, cid));
total_tables++;
}
}
AssertInfo(total_tables == cids.size(),
"Number of tables ({}) does not match number of cids ({})",
total_tables,
cids.size());
return cells;
}
void
GroupChunkTranslator::load_column_group_in_memory() {
std::vector<size_t> row_counts(field_id_list_.size(), 0);
std::shared_ptr<milvus::ArrowDataWrapper> r;
std::vector<std::string> files;
std::vector<size_t> file_offsets;
while (column_group_info_.arrow_reader_channel->pop(r)) {
for (const auto& table : r->arrow_tables) {
process_batch(table, files, file_offsets, row_counts);
}
}
}
void
GroupChunkTranslator::load_column_group_in_mmap() {
std::vector<std::string> files;
std::vector<size_t> file_offsets;
std::vector<size_t> row_counts;
// Initialize files and offsets
for (size_t i = 0; i < field_id_list_.size(); ++i) {
auto field_id = field_id_list_.Get(i);
auto filepath =
std::filesystem::path(column_group_info_.mmap_dir_path) /
std::to_string(segment_id_) / std::to_string(field_id);
auto dir = filepath.parent_path();
std::filesystem::create_directories(dir);
files.push_back(filepath.string());
file_offsets.push_back(0);
row_counts.push_back(0);
}
std::shared_ptr<milvus::ArrowDataWrapper> r;
while (column_group_info_.arrow_reader_channel->pop(r)) {
for (const auto& table : r->arrow_tables) {
process_batch(table, files, file_offsets, row_counts);
}
}
for (size_t i = 0; i < files.size(); ++i) {
auto ok = unlink(files[i].c_str());
AssertInfo(ok == 0,
fmt::format("failed to unlink mmap data file {}, err: {}",
files[i].c_str(),
strerror(errno)));
}
}
void
GroupChunkTranslator::process_batch(const std::shared_ptr<arrow::Table>& table,
const std::vector<std::string>& files,
std::vector<size_t>& file_offsets,
std::vector<size_t>& row_counts) {
std::unique_ptr<milvus::GroupChunk>
GroupChunkTranslator::load_group_chunk(
const std::shared_ptr<arrow::Table>& table,
const milvus::cachinglayer::cid_t cid) {
// Create chunks for each field in this batch
std::unordered_map<FieldId, std::shared_ptr<Chunk>> chunks;
// Iterate through field_id_list to get field_id and create chunk
@ -263,25 +211,35 @@ GroupChunkTranslator::process_batch(const std::shared_ptr<arrow::Table>& table,
chunk = create_chunk(field_meta, dim, array_vec);
} else {
// Mmap mode
int flags = O_RDWR;
if (file_offsets[i] == 0) {
// First write to this file, create and truncate
flags |= O_CREAT | O_TRUNC;
}
auto file = File::Open(files[i], flags);
// should seek to the file offset before writing
file.Seek(file_offsets[i], SEEK_SET);
chunk =
create_chunk(field_meta, dim, file, file_offsets[i], array_vec);
file_offsets[i] += chunk->Size();
auto filepath =
std::filesystem::path(column_group_info_.mmap_dir_path) /
std::to_string(segment_id_) / std::to_string(field_id) /
std::to_string(cid);
LOG_INFO(
"storage v2 segment {} mmaping field {} chunk {} to path {}",
segment_id_,
field_id,
cid,
filepath.string());
std::filesystem::create_directories(filepath.parent_path());
auto file =
File::Open(filepath.string(), O_CREAT | O_TRUNC | O_RDWR);
auto chunk = create_chunk(field_meta, dim, file, 0, array_vec);
auto ok = unlink(filepath.c_str());
AssertInfo(
ok == 0,
fmt::format(
"storage v2 failed to unlink mmap data file {}, err: {}",
filepath.c_str(),
strerror(errno)));
}
row_counts[i] += chunk->RowNums();
chunks[fid] = std::move(chunk);
}
// Create GroupChunk from chunks and store in results
auto group_chunk = std::make_unique<milvus::GroupChunk>(chunks);
group_chunks_.emplace_back(group_chunk.release());
return std::make_unique<milvus::GroupChunk>(chunks);
}
} // namespace milvus::segcore::storagev2translator

View File

@ -72,17 +72,9 @@ class GroupChunkTranslator
}
private:
void
load_column_group_in_memory();
void
load_column_group_in_mmap();
void
process_batch(const std::shared_ptr<arrow::Table>& table,
const std::vector<std::string>& files,
std::vector<size_t>& file_offsets,
std::vector<size_t>& row_counts);
std::unique_ptr<milvus::GroupChunk>
load_group_chunk(const std::shared_ptr<arrow::Table>& table,
const milvus::cachinglayer::cid_t cid);
int64_t segment_id_;
std::string key_;
@ -96,7 +88,6 @@ class GroupChunkTranslator
ChunkedSegmentSealedImpl* chunked_segment_;
std::unique_ptr<milvus::segcore::InsertRecord<true>> ir_;
GroupCTMeta meta_;
std::vector<milvus::GroupChunk*> group_chunks_;
int64_t timestamp_offet_;
bool use_mmap_;
};