enhance: Add MAP_POPULATE flag for mmap to reduce page faults (#46761)

Add configurable MAP_POPULATE flag support for mmap operations to reduce
page faults and improve first read performance.

Key changes:
- Add `queryNode.mmap.populate` config (default: true) to control
MAP_POPULATE flag usage
- Add `mmap_populate` parameter to MmapChunkTarget, ChunkTranslator,
GroupChunkTranslator, and ManifestGroupTranslator
- Apply MAP_POPULATE to both MmapChunkTarget and MemChunkTarget
- Propagate mmap_populate setting through chunk creation pipeline

When enabled, MAP_POPULATE pre-faults the mapped pages into memory,
eliminating page faults during subsequent access and improving query
performance for the first read operations.

issue: #46760

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2026-01-05 18:57:24 +08:00 committed by GitHub
parent d0e6a624a7
commit 01da5010f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 85 additions and 19 deletions

View File

@ -58,7 +58,11 @@ MmapChunkTarget::release() {
flush();
auto file = File::Open(file_path_, O_RDWR);
auto m = mmap(nullptr, cap_, PROT_READ, MAP_SHARED, file.Descriptor(), 0);
auto mmap_flag = MAP_SHARED;
if (populate_) {
mmap_flag |= MAP_POPULATE;
}
auto m = mmap(nullptr, cap_, PROT_READ, mmap_flag, file.Descriptor(), 0);
AssertInfo(m != MAP_FAILED,
"failed to map: {}, map_size={}",
strerror(errno),

View File

@ -12,6 +12,10 @@
#pragma once
#include <memory>
#include <sys/mman.h>
#ifndef MAP_POPULATE
#define MAP_POPULATE 0
#endif
#include <sys/types.h>
#include <unistd.h>
#include <cstddef>
@ -54,9 +58,10 @@ class ChunkTarget {
class MmapChunkTarget : public ChunkTarget {
public:
explicit MmapChunkTarget(std::string file_path,
bool populate,
size_t cap,
storage::io::Priority io_prio)
: file_path_(std::move(file_path)), cap_(cap) {
: file_path_(std::move(file_path)), cap_(cap), populate_(populate) {
file_writer_ =
std::make_unique<storage::FileWriter>(file_path_, io_prio);
}
@ -78,17 +83,17 @@ class MmapChunkTarget : public ChunkTarget {
std::string file_path_{};
size_t cap_{0};
size_t size_{0};
bool populate_{false};
};
class MemChunkTarget : public ChunkTarget {
public:
explicit MemChunkTarget(size_t cap) : cap_(cap) {
auto m = mmap(nullptr,
cap,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON,
-1,
0);
explicit MemChunkTarget(size_t cap, bool populate = true) : cap_(cap) {
auto mmap_flag = MAP_PRIVATE | MAP_ANON;
if (populate) {
mmap_flag |= MAP_POPULATE;
}
auto m = mmap(nullptr, cap, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
AssertInfo(m != MAP_FAILED,
"failed to map: {}, map_size={}",
strerror(errno),

View File

@ -760,6 +760,7 @@ std::unique_ptr<Chunk>
create_chunk(const FieldMeta& field_meta,
const arrow::ArrayVector& array_vec,
const std::string& file_path,
bool mmap_populate,
proto::common::LoadPriority load_priority) {
auto cw = create_chunk_writer(field_meta);
auto [size, row_nums] = cw->calculate_size(array_vec);
@ -767,11 +768,11 @@ create_chunk(const FieldMeta& field_meta,
~(ChunkTarget::ALIGNED_SIZE - 1);
std::shared_ptr<ChunkTarget> target;
if (file_path.empty()) {
target = std::make_shared<MemChunkTarget>(aligned_size);
target = std::make_shared<MemChunkTarget>(aligned_size, mmap_populate);
} else {
auto io_prio = storage::io::GetPriorityFromLoadPriority(load_priority);
target =
std::make_shared<MmapChunkTarget>(file_path, aligned_size, io_prio);
target = std::make_shared<MmapChunkTarget>(
file_path, mmap_populate, aligned_size, io_prio);
}
cw->write_to_target(array_vec, target);
auto data = target->release();
@ -791,6 +792,7 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
const std::vector<FieldMeta>& field_metas,
const std::vector<arrow::ArrayVector>& array_vec,
const std::string& file_path,
bool mmap_populate,
proto::common::LoadPriority load_priority) {
std::vector<std::shared_ptr<ChunkWriterBase>> cws;
cws.reserve(field_ids.size());
@ -825,10 +827,12 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
}
std::shared_ptr<ChunkTarget> target;
if (file_path.empty()) {
target = std::make_shared<MemChunkTarget>(total_aligned_size);
target =
std::make_shared<MemChunkTarget>(total_aligned_size, mmap_populate);
} else {
target = std::make_shared<MmapChunkTarget>(
file_path,
mmap_populate,
total_aligned_size,
storage::io::GetPriorityFromLoadPriority(load_priority));
}

View File

@ -294,6 +294,7 @@ std::unique_ptr<Chunk>
create_chunk(const FieldMeta& field_meta,
const arrow::ArrayVector& array_vec,
const std::string& file_path = "",
bool mmap_populate_ = true,
proto::common::LoadPriority load_priority =
proto::common::LoadPriority::HIGH);
@ -302,6 +303,7 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
const std::vector<FieldMeta>& field_metas,
const std::vector<arrow::ArrayVector>& array_vec,
const std::string& file_path = "",
bool mmap_populate_ = true,
proto::common::LoadPriority load_priority =
proto::common::LoadPriority::HIGH);

View File

@ -120,6 +120,7 @@ typedef struct CMmapConfig {
bool scalar_field_enable_mmap;
bool vector_index_enable_mmap;
bool vector_field_enable_mmap;
bool mmap_populate;
} CMmapConfig;
typedef struct CTraceConfig {

View File

@ -1004,6 +1004,8 @@ JsonKeyStats::LoadColumnGroup(int64_t column_group_id,
std::move(field_meta)));
}
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
auto translator = std::make_unique<
milvus::segcore::storagev2translator::GroupChunkTranslator>(
segment_id_,
@ -1012,6 +1014,7 @@ JsonKeyStats::LoadColumnGroup(int64_t column_group_id,
column_group_info,
files,
enable_mmap,
mmap_config.GetMmapPopulate(),
milvus_field_ids.size(),
load_priority_);

View File

@ -302,6 +302,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
const LoadFieldDataInfo& load_info) {
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
ArrowSchemaPtr arrow_schema = schema_->ConvertToArrowSchema();
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
for (auto& [id, info] : load_info.field_infos) {
AssertInfo(info.row_count > 0,
@ -364,6 +365,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
column_group_info,
insert_files,
info.enable_mmap,
mmap_config.GetMmapPopulate(),
milvus_field_ids.size(),
load_info.load_priority);
@ -438,6 +440,8 @@ ChunkedSegmentSealedImpl::load_field_data_internal(
const LoadFieldDataInfo& load_info) {
SCOPE_CGO_CALL_METRIC();
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
AssertInfo(
!num_rows_.has_value() || num_rows_ == num_rows,
@ -507,6 +511,7 @@ ChunkedSegmentSealedImpl::load_field_data_internal(
field_data_info,
std::move(file_infos),
info.enable_mmap,
mmap_config.GetMmapPopulate(),
load_info.load_priority);
auto data_type = field_meta.get_data_type();
@ -2930,6 +2935,7 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
std::move(chunk_reader),
field_metas,
use_mmap,
mmap_config.GetMmapPopulate(),
mmap_dir_path,
column_group->columns.size(),
segment_load_info_.GetPriority());

View File

@ -73,12 +73,14 @@ ChunkTranslator::ChunkTranslator(
FieldDataInfo field_data_info,
std::vector<FileInfo>&& file_infos,
bool use_mmap,
bool mmap_populate,
milvus::proto::common::LoadPriority load_priority)
: segment_id_(segment_id),
field_id_(field_data_info.field_id),
field_meta_(field_meta),
key_(fmt::format("seg_{}_f_{}", segment_id, field_meta.get_id().get())),
use_mmap_(use_mmap),
mmap_populate_(mmap_populate),
file_infos_(std::move(file_infos)),
mmap_dir_path_(field_data_info.mmap_dir_path),
meta_(use_mmap ? milvus::cachinglayer::StorageType::DISK
@ -201,8 +203,11 @@ ChunkTranslator::get_cells(
AssertInfo(popped, "failed to pop arrow reader from channel");
arrow::ArrayVector array_vec =
read_single_column_batches(r->reader);
chunk = create_chunk(
field_meta_, array_vec, filepath.string(), load_priority_);
chunk = create_chunk(field_meta_,
array_vec,
filepath.string(),
mmap_populate_,
load_priority_);
}
cells.emplace_back(cid, std::move(chunk));
}

View File

@ -64,6 +64,7 @@ class ChunkTranslator : public milvus::cachinglayer::Translator<milvus::Chunk> {
FieldDataInfo field_data_info,
std::vector<FileInfo>&& file_infos,
bool use_mmap,
bool mmap_populate,
milvus::proto::common::LoadPriority load_priority);
size_t
@ -102,6 +103,7 @@ class ChunkTranslator : public milvus::cachinglayer::Translator<milvus::Chunk> {
int64_t field_id_;
std::string key_;
bool use_mmap_;
bool mmap_populate_;
CTMeta meta_;
FieldMeta field_meta_;
std::string mmap_dir_path_;

View File

@ -50,6 +50,7 @@ GroupChunkTranslator::GroupChunkTranslator(
FieldDataInfo column_group_info,
std::vector<std::string> insert_files,
bool use_mmap,
bool mmap_populate,
int64_t num_fields,
milvus::proto::common::LoadPriority load_priority)
: segment_id_(segment_id),
@ -74,6 +75,7 @@ GroupChunkTranslator::GroupChunkTranslator(
column_group_info_(column_group_info),
insert_files_(insert_files),
use_mmap_(use_mmap),
mmap_populate_(mmap_populate),
load_priority_(load_priority),
meta_(num_fields,
use_mmap ? milvus::cachinglayer::StorageType::DISK
@ -462,8 +464,11 @@ GroupChunkTranslator::load_group_chunk(
static_cast<uint8_t>(group_chunk_type_));
}
std::filesystem::create_directories(filepath.parent_path());
chunks = create_group_chunk(
field_ids, field_metas, array_vecs, filepath.string());
chunks = create_group_chunk(field_ids,
field_metas,
array_vecs,
filepath.string(),
mmap_populate_);
}
return std::make_unique<milvus::GroupChunk>(chunks);
}

View File

@ -43,6 +43,7 @@ class GroupChunkTranslator
FieldDataInfo column_group_info,
std::vector<std::string> insert_files,
bool use_mmap,
bool mmap_populate,
int64_t num_fields,
milvus::proto::common::LoadPriority load_priority);
@ -119,6 +120,7 @@ class GroupChunkTranslator
GroupCTMeta meta_;
int64_t timestamp_offet_;
bool use_mmap_;
bool mmap_populate_;
milvus::proto::common::LoadPriority load_priority_{
milvus::proto::common::LoadPriority::HIGH};
std::vector<std::shared_ptr<parquet::FileMetaData>> parquet_file_metadata_;

View File

@ -116,6 +116,7 @@ TEST_P(GroupChunkTranslatorTest, TestWithMmap) {
column_group_info,
paths_,
use_mmap,
true,
schema_->get_field_ids().size(),
milvus::proto::common::LoadPriority::LOW);
@ -279,6 +280,7 @@ TEST_P(GroupChunkTranslatorTest, TestMultipleFiles) {
column_group_info,
multi_file_paths,
use_mmap,
true,
schema_->get_field_ids().size(),
milvus::proto::common::LoadPriority::LOW);

View File

@ -54,6 +54,7 @@ ManifestGroupTranslator::ManifestGroupTranslator(
std::unique_ptr<milvus_storage::api::ChunkReader> chunk_reader,
const std::unordered_map<FieldId, FieldMeta>& field_metas,
bool use_mmap,
bool mmap_populate,
const std::string& mmap_dir_path,
int64_t num_fields,
milvus::proto::common::LoadPriority load_priority)
@ -92,6 +93,7 @@ ManifestGroupTranslator::ManifestGroupTranslator(
/* is_index */ false),
/* support_eviction */ true),
use_mmap_(use_mmap),
mmap_populate_(mmap_populate),
load_priority_(load_priority) {
auto chunk_size_result = chunk_reader_->get_chunk_size();
if (!chunk_size_result.ok()) {
@ -318,8 +320,11 @@ ManifestGroupTranslator::load_group_chunk(
static_cast<uint8_t>(group_chunk_type_));
}
std::filesystem::create_directories(filepath.parent_path());
chunks = create_group_chunk(
field_ids, field_metas, array_vecs, filepath.string());
chunks = create_group_chunk(field_ids,
field_metas,
array_vecs,
filepath.string(),
mmap_populate_);
}
return std::make_unique<milvus::GroupChunk>(chunks);

View File

@ -52,6 +52,7 @@ class ManifestGroupTranslator
* @param chunk_reader Reader for accessing chunks from storage
* @param field_metas Metadata for all fields in this column group
* @param use_mmap Whether to use memory mapping for data loading
* @param mmap_populate Whether to populate data into memory mapping
* @param mmap_dir_path Directory path for memory mapping
* @param num_fields Total number of fields in the column group
* @param load_priority Priority level for loading operations
@ -63,6 +64,7 @@ class ManifestGroupTranslator
std::unique_ptr<milvus_storage::api::ChunkReader> chunk_reader,
const std::unordered_map<FieldId, FieldMeta>& field_metas,
bool use_mmap,
bool mmap_populate,
const std::string& mmap_dir_path,
int64_t num_fields,
milvus::proto::common::LoadPriority load_priority);
@ -179,6 +181,7 @@ class ManifestGroupTranslator
GroupCTMeta meta_;
bool use_mmap_;
bool mmap_populate_;
std::string mmap_dir_path_;
milvus::proto::common::LoadPriority load_priority_{
milvus::proto::common::LoadPriority::HIGH};

View File

@ -136,6 +136,7 @@ struct MmapConfig {
bool scalar_field_enable_mmap;
bool vector_index_enable_mmap;
bool vector_field_enable_mmap;
bool mmap_populate;
bool
GetEnableGrowingMmap() const {
return growing_enable_mmap;
@ -176,6 +177,10 @@ struct MmapConfig {
SetVectorFieldEnableMmap(bool flag) {
this->vector_field_enable_mmap = flag;
}
[[nodiscard]] bool
GetMmapPopulate() const {
return mmap_populate;
}
std::string
GetMmapPath() {

View File

@ -113,6 +113,7 @@ InitMmapManager(CMmapConfig c_mmap_config) {
c_mmap_config.vector_index_enable_mmap;
mmap_config.vector_field_enable_mmap =
c_mmap_config.vector_field_enable_mmap;
mmap_config.mmap_populate = c_mmap_config.mmap_populate;
milvus::storage::MmapManager::GetInstance().Init(mmap_config);
return milvus::SuccessCStatus();
} catch (std::exception& e) {

View File

@ -278,6 +278,7 @@ func InitMmapManager(params *paramtable.ComponentParam, nodeID int64) error {
scalar_field_enable_mmap: C.bool(params.QueryNodeCfg.MmapScalarField.GetAsBool()),
vector_index_enable_mmap: C.bool(params.QueryNodeCfg.MmapVectorIndex.GetAsBool()),
vector_field_enable_mmap: C.bool(params.QueryNodeCfg.MmapVectorField.GetAsBool()),
mmap_populate: C.bool(params.QueryNodeCfg.MmapPopulate.GetAsBool()),
}
status := C.InitMmapManager(mmapConfig)
return HandleCStatus(&status, "InitMmapManager failed")

View File

@ -3253,6 +3253,7 @@ type queryNodeConfig struct {
MmapVectorIndex ParamItem `refreshable:"false"`
MmapScalarField ParamItem `refreshable:"false"`
MmapScalarIndex ParamItem `refreshable:"false"`
MmapPopulate ParamItem `refreshable:"false"`
MmapJSONStats ParamItem `refreshable:"false"`
GrowingMmapEnabled ParamItem `refreshable:"false"`
FixedFileSizeForMmapManager ParamItem `refreshable:"false"`
@ -3992,6 +3993,15 @@ This defaults to true, indicating that Milvus creates temporary index for growin
}
p.MmapScalarIndex.Init(base.mgr)
p.MmapPopulate = ParamItem{
Key: "queryNode.mmap.populate",
Version: "2.6.9",
DefaultValue: "true",
Doc: "MAP_POPULATE flag for mmap",
Export: false,
}
p.MmapPopulate.Init(base.mgr)
p.MmapJSONStats = ParamItem{
Key: "queryNode.mmap.jsonShredding",
Version: "2.6.5",