mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: Add MAP_POPULATE flag for mmap to reduce page faults (#46761)
Add configurable MAP_POPULATE flag support for mmap operations to reduce page faults and improve first read performance. Key changes: - Add `queryNode.mmap.populate` config (default: true) to control MAP_POPULATE flag usage - Add `mmap_populate` parameter to MmapChunkTarget, ChunkTranslator, GroupChunkTranslator, and ManifestGroupTranslator - Apply MAP_POPULATE to both MmapChunkTarget and MemChunkTarget - Propagate mmap_populate setting through chunk creation pipeline When enabled, MAP_POPULATE pre-faults the mapped pages into memory, eliminating page faults during subsequent access and improving query performance for the first read operations. issue: #46760 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
d0e6a624a7
commit
01da5010f5
@ -58,7 +58,11 @@ MmapChunkTarget::release() {
|
||||
flush();
|
||||
|
||||
auto file = File::Open(file_path_, O_RDWR);
|
||||
auto m = mmap(nullptr, cap_, PROT_READ, MAP_SHARED, file.Descriptor(), 0);
|
||||
auto mmap_flag = MAP_SHARED;
|
||||
if (populate_) {
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
}
|
||||
auto m = mmap(nullptr, cap_, PROT_READ, mmap_flag, file.Descriptor(), 0);
|
||||
AssertInfo(m != MAP_FAILED,
|
||||
"failed to map: {}, map_size={}",
|
||||
strerror(errno),
|
||||
|
||||
@ -12,6 +12,10 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#ifndef MAP_POPULATE
|
||||
#define MAP_POPULATE 0
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <cstddef>
|
||||
@ -54,9 +58,10 @@ class ChunkTarget {
|
||||
class MmapChunkTarget : public ChunkTarget {
|
||||
public:
|
||||
explicit MmapChunkTarget(std::string file_path,
|
||||
bool populate,
|
||||
size_t cap,
|
||||
storage::io::Priority io_prio)
|
||||
: file_path_(std::move(file_path)), cap_(cap) {
|
||||
: file_path_(std::move(file_path)), cap_(cap), populate_(populate) {
|
||||
file_writer_ =
|
||||
std::make_unique<storage::FileWriter>(file_path_, io_prio);
|
||||
}
|
||||
@ -78,17 +83,17 @@ class MmapChunkTarget : public ChunkTarget {
|
||||
std::string file_path_{};
|
||||
size_t cap_{0};
|
||||
size_t size_{0};
|
||||
bool populate_{false};
|
||||
};
|
||||
|
||||
class MemChunkTarget : public ChunkTarget {
|
||||
public:
|
||||
explicit MemChunkTarget(size_t cap) : cap_(cap) {
|
||||
auto m = mmap(nullptr,
|
||||
cap,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON,
|
||||
-1,
|
||||
0);
|
||||
explicit MemChunkTarget(size_t cap, bool populate = true) : cap_(cap) {
|
||||
auto mmap_flag = MAP_PRIVATE | MAP_ANON;
|
||||
if (populate) {
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
}
|
||||
auto m = mmap(nullptr, cap, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
||||
AssertInfo(m != MAP_FAILED,
|
||||
"failed to map: {}, map_size={}",
|
||||
strerror(errno),
|
||||
|
||||
@ -760,6 +760,7 @@ std::unique_ptr<Chunk>
|
||||
create_chunk(const FieldMeta& field_meta,
|
||||
const arrow::ArrayVector& array_vec,
|
||||
const std::string& file_path,
|
||||
bool mmap_populate,
|
||||
proto::common::LoadPriority load_priority) {
|
||||
auto cw = create_chunk_writer(field_meta);
|
||||
auto [size, row_nums] = cw->calculate_size(array_vec);
|
||||
@ -767,11 +768,11 @@ create_chunk(const FieldMeta& field_meta,
|
||||
~(ChunkTarget::ALIGNED_SIZE - 1);
|
||||
std::shared_ptr<ChunkTarget> target;
|
||||
if (file_path.empty()) {
|
||||
target = std::make_shared<MemChunkTarget>(aligned_size);
|
||||
target = std::make_shared<MemChunkTarget>(aligned_size, mmap_populate);
|
||||
} else {
|
||||
auto io_prio = storage::io::GetPriorityFromLoadPriority(load_priority);
|
||||
target =
|
||||
std::make_shared<MmapChunkTarget>(file_path, aligned_size, io_prio);
|
||||
target = std::make_shared<MmapChunkTarget>(
|
||||
file_path, mmap_populate, aligned_size, io_prio);
|
||||
}
|
||||
cw->write_to_target(array_vec, target);
|
||||
auto data = target->release();
|
||||
@ -791,6 +792,7 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
|
||||
const std::vector<FieldMeta>& field_metas,
|
||||
const std::vector<arrow::ArrayVector>& array_vec,
|
||||
const std::string& file_path,
|
||||
bool mmap_populate,
|
||||
proto::common::LoadPriority load_priority) {
|
||||
std::vector<std::shared_ptr<ChunkWriterBase>> cws;
|
||||
cws.reserve(field_ids.size());
|
||||
@ -825,10 +827,12 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
|
||||
}
|
||||
std::shared_ptr<ChunkTarget> target;
|
||||
if (file_path.empty()) {
|
||||
target = std::make_shared<MemChunkTarget>(total_aligned_size);
|
||||
target =
|
||||
std::make_shared<MemChunkTarget>(total_aligned_size, mmap_populate);
|
||||
} else {
|
||||
target = std::make_shared<MmapChunkTarget>(
|
||||
file_path,
|
||||
mmap_populate,
|
||||
total_aligned_size,
|
||||
storage::io::GetPriorityFromLoadPriority(load_priority));
|
||||
}
|
||||
|
||||
@ -294,6 +294,7 @@ std::unique_ptr<Chunk>
|
||||
create_chunk(const FieldMeta& field_meta,
|
||||
const arrow::ArrayVector& array_vec,
|
||||
const std::string& file_path = "",
|
||||
bool mmap_populate_ = true,
|
||||
proto::common::LoadPriority load_priority =
|
||||
proto::common::LoadPriority::HIGH);
|
||||
|
||||
@ -302,6 +303,7 @@ create_group_chunk(const std::vector<FieldId>& field_ids,
|
||||
const std::vector<FieldMeta>& field_metas,
|
||||
const std::vector<arrow::ArrayVector>& array_vec,
|
||||
const std::string& file_path = "",
|
||||
bool mmap_populate_ = true,
|
||||
proto::common::LoadPriority load_priority =
|
||||
proto::common::LoadPriority::HIGH);
|
||||
|
||||
|
||||
@ -120,6 +120,7 @@ typedef struct CMmapConfig {
|
||||
bool scalar_field_enable_mmap;
|
||||
bool vector_index_enable_mmap;
|
||||
bool vector_field_enable_mmap;
|
||||
bool mmap_populate;
|
||||
} CMmapConfig;
|
||||
|
||||
typedef struct CTraceConfig {
|
||||
|
||||
@ -1004,6 +1004,8 @@ JsonKeyStats::LoadColumnGroup(int64_t column_group_id,
|
||||
std::move(field_meta)));
|
||||
}
|
||||
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
|
||||
auto translator = std::make_unique<
|
||||
milvus::segcore::storagev2translator::GroupChunkTranslator>(
|
||||
segment_id_,
|
||||
@ -1012,6 +1014,7 @@ JsonKeyStats::LoadColumnGroup(int64_t column_group_id,
|
||||
column_group_info,
|
||||
files,
|
||||
enable_mmap,
|
||||
mmap_config.GetMmapPopulate(),
|
||||
milvus_field_ids.size(),
|
||||
load_priority_);
|
||||
|
||||
|
||||
@ -302,6 +302,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
|
||||
const LoadFieldDataInfo& load_info) {
|
||||
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
|
||||
ArrowSchemaPtr arrow_schema = schema_->ConvertToArrowSchema();
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
|
||||
for (auto& [id, info] : load_info.field_infos) {
|
||||
AssertInfo(info.row_count > 0,
|
||||
@ -364,6 +365,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
|
||||
column_group_info,
|
||||
insert_files,
|
||||
info.enable_mmap,
|
||||
mmap_config.GetMmapPopulate(),
|
||||
milvus_field_ids.size(),
|
||||
load_info.load_priority);
|
||||
|
||||
@ -438,6 +440,8 @@ ChunkedSegmentSealedImpl::load_field_data_internal(
|
||||
const LoadFieldDataInfo& load_info) {
|
||||
SCOPE_CGO_CALL_METRIC();
|
||||
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
|
||||
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
|
||||
AssertInfo(
|
||||
!num_rows_.has_value() || num_rows_ == num_rows,
|
||||
@ -507,6 +511,7 @@ ChunkedSegmentSealedImpl::load_field_data_internal(
|
||||
field_data_info,
|
||||
std::move(file_infos),
|
||||
info.enable_mmap,
|
||||
mmap_config.GetMmapPopulate(),
|
||||
load_info.load_priority);
|
||||
|
||||
auto data_type = field_meta.get_data_type();
|
||||
@ -2930,6 +2935,7 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
|
||||
std::move(chunk_reader),
|
||||
field_metas,
|
||||
use_mmap,
|
||||
mmap_config.GetMmapPopulate(),
|
||||
mmap_dir_path,
|
||||
column_group->columns.size(),
|
||||
segment_load_info_.GetPriority());
|
||||
|
||||
@ -73,12 +73,14 @@ ChunkTranslator::ChunkTranslator(
|
||||
FieldDataInfo field_data_info,
|
||||
std::vector<FileInfo>&& file_infos,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
milvus::proto::common::LoadPriority load_priority)
|
||||
: segment_id_(segment_id),
|
||||
field_id_(field_data_info.field_id),
|
||||
field_meta_(field_meta),
|
||||
key_(fmt::format("seg_{}_f_{}", segment_id, field_meta.get_id().get())),
|
||||
use_mmap_(use_mmap),
|
||||
mmap_populate_(mmap_populate),
|
||||
file_infos_(std::move(file_infos)),
|
||||
mmap_dir_path_(field_data_info.mmap_dir_path),
|
||||
meta_(use_mmap ? milvus::cachinglayer::StorageType::DISK
|
||||
@ -201,8 +203,11 @@ ChunkTranslator::get_cells(
|
||||
AssertInfo(popped, "failed to pop arrow reader from channel");
|
||||
arrow::ArrayVector array_vec =
|
||||
read_single_column_batches(r->reader);
|
||||
chunk = create_chunk(
|
||||
field_meta_, array_vec, filepath.string(), load_priority_);
|
||||
chunk = create_chunk(field_meta_,
|
||||
array_vec,
|
||||
filepath.string(),
|
||||
mmap_populate_,
|
||||
load_priority_);
|
||||
}
|
||||
cells.emplace_back(cid, std::move(chunk));
|
||||
}
|
||||
|
||||
@ -64,6 +64,7 @@ class ChunkTranslator : public milvus::cachinglayer::Translator<milvus::Chunk> {
|
||||
FieldDataInfo field_data_info,
|
||||
std::vector<FileInfo>&& file_infos,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
milvus::proto::common::LoadPriority load_priority);
|
||||
|
||||
size_t
|
||||
@ -102,6 +103,7 @@ class ChunkTranslator : public milvus::cachinglayer::Translator<milvus::Chunk> {
|
||||
int64_t field_id_;
|
||||
std::string key_;
|
||||
bool use_mmap_;
|
||||
bool mmap_populate_;
|
||||
CTMeta meta_;
|
||||
FieldMeta field_meta_;
|
||||
std::string mmap_dir_path_;
|
||||
|
||||
@ -50,6 +50,7 @@ GroupChunkTranslator::GroupChunkTranslator(
|
||||
FieldDataInfo column_group_info,
|
||||
std::vector<std::string> insert_files,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
int64_t num_fields,
|
||||
milvus::proto::common::LoadPriority load_priority)
|
||||
: segment_id_(segment_id),
|
||||
@ -74,6 +75,7 @@ GroupChunkTranslator::GroupChunkTranslator(
|
||||
column_group_info_(column_group_info),
|
||||
insert_files_(insert_files),
|
||||
use_mmap_(use_mmap),
|
||||
mmap_populate_(mmap_populate),
|
||||
load_priority_(load_priority),
|
||||
meta_(num_fields,
|
||||
use_mmap ? milvus::cachinglayer::StorageType::DISK
|
||||
@ -462,8 +464,11 @@ GroupChunkTranslator::load_group_chunk(
|
||||
static_cast<uint8_t>(group_chunk_type_));
|
||||
}
|
||||
std::filesystem::create_directories(filepath.parent_path());
|
||||
chunks = create_group_chunk(
|
||||
field_ids, field_metas, array_vecs, filepath.string());
|
||||
chunks = create_group_chunk(field_ids,
|
||||
field_metas,
|
||||
array_vecs,
|
||||
filepath.string(),
|
||||
mmap_populate_);
|
||||
}
|
||||
return std::make_unique<milvus::GroupChunk>(chunks);
|
||||
}
|
||||
|
||||
@ -43,6 +43,7 @@ class GroupChunkTranslator
|
||||
FieldDataInfo column_group_info,
|
||||
std::vector<std::string> insert_files,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
int64_t num_fields,
|
||||
milvus::proto::common::LoadPriority load_priority);
|
||||
|
||||
@ -119,6 +120,7 @@ class GroupChunkTranslator
|
||||
GroupCTMeta meta_;
|
||||
int64_t timestamp_offet_;
|
||||
bool use_mmap_;
|
||||
bool mmap_populate_;
|
||||
milvus::proto::common::LoadPriority load_priority_{
|
||||
milvus::proto::common::LoadPriority::HIGH};
|
||||
std::vector<std::shared_ptr<parquet::FileMetaData>> parquet_file_metadata_;
|
||||
|
||||
@ -116,6 +116,7 @@ TEST_P(GroupChunkTranslatorTest, TestWithMmap) {
|
||||
column_group_info,
|
||||
paths_,
|
||||
use_mmap,
|
||||
true,
|
||||
schema_->get_field_ids().size(),
|
||||
milvus::proto::common::LoadPriority::LOW);
|
||||
|
||||
@ -279,6 +280,7 @@ TEST_P(GroupChunkTranslatorTest, TestMultipleFiles) {
|
||||
column_group_info,
|
||||
multi_file_paths,
|
||||
use_mmap,
|
||||
true,
|
||||
schema_->get_field_ids().size(),
|
||||
milvus::proto::common::LoadPriority::LOW);
|
||||
|
||||
|
||||
@ -54,6 +54,7 @@ ManifestGroupTranslator::ManifestGroupTranslator(
|
||||
std::unique_ptr<milvus_storage::api::ChunkReader> chunk_reader,
|
||||
const std::unordered_map<FieldId, FieldMeta>& field_metas,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
const std::string& mmap_dir_path,
|
||||
int64_t num_fields,
|
||||
milvus::proto::common::LoadPriority load_priority)
|
||||
@ -92,6 +93,7 @@ ManifestGroupTranslator::ManifestGroupTranslator(
|
||||
/* is_index */ false),
|
||||
/* support_eviction */ true),
|
||||
use_mmap_(use_mmap),
|
||||
mmap_populate_(mmap_populate),
|
||||
load_priority_(load_priority) {
|
||||
auto chunk_size_result = chunk_reader_->get_chunk_size();
|
||||
if (!chunk_size_result.ok()) {
|
||||
@ -318,8 +320,11 @@ ManifestGroupTranslator::load_group_chunk(
|
||||
static_cast<uint8_t>(group_chunk_type_));
|
||||
}
|
||||
std::filesystem::create_directories(filepath.parent_path());
|
||||
chunks = create_group_chunk(
|
||||
field_ids, field_metas, array_vecs, filepath.string());
|
||||
chunks = create_group_chunk(field_ids,
|
||||
field_metas,
|
||||
array_vecs,
|
||||
filepath.string(),
|
||||
mmap_populate_);
|
||||
}
|
||||
|
||||
return std::make_unique<milvus::GroupChunk>(chunks);
|
||||
|
||||
@ -52,6 +52,7 @@ class ManifestGroupTranslator
|
||||
* @param chunk_reader Reader for accessing chunks from storage
|
||||
* @param field_metas Metadata for all fields in this column group
|
||||
* @param use_mmap Whether to use memory mapping for data loading
|
||||
* @param mmap_populate Whether to populate data into memory mapping
|
||||
* @param mmap_dir_path Directory path for memory mapping
|
||||
* @param num_fields Total number of fields in the column group
|
||||
* @param load_priority Priority level for loading operations
|
||||
@ -63,6 +64,7 @@ class ManifestGroupTranslator
|
||||
std::unique_ptr<milvus_storage::api::ChunkReader> chunk_reader,
|
||||
const std::unordered_map<FieldId, FieldMeta>& field_metas,
|
||||
bool use_mmap,
|
||||
bool mmap_populate,
|
||||
const std::string& mmap_dir_path,
|
||||
int64_t num_fields,
|
||||
milvus::proto::common::LoadPriority load_priority);
|
||||
@ -179,6 +181,7 @@ class ManifestGroupTranslator
|
||||
|
||||
GroupCTMeta meta_;
|
||||
bool use_mmap_;
|
||||
bool mmap_populate_;
|
||||
std::string mmap_dir_path_;
|
||||
milvus::proto::common::LoadPriority load_priority_{
|
||||
milvus::proto::common::LoadPriority::HIGH};
|
||||
|
||||
@ -136,6 +136,7 @@ struct MmapConfig {
|
||||
bool scalar_field_enable_mmap;
|
||||
bool vector_index_enable_mmap;
|
||||
bool vector_field_enable_mmap;
|
||||
bool mmap_populate;
|
||||
bool
|
||||
GetEnableGrowingMmap() const {
|
||||
return growing_enable_mmap;
|
||||
@ -176,6 +177,10 @@ struct MmapConfig {
|
||||
SetVectorFieldEnableMmap(bool flag) {
|
||||
this->vector_field_enable_mmap = flag;
|
||||
}
|
||||
[[nodiscard]] bool
|
||||
GetMmapPopulate() const {
|
||||
return mmap_populate;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetMmapPath() {
|
||||
|
||||
@ -113,6 +113,7 @@ InitMmapManager(CMmapConfig c_mmap_config) {
|
||||
c_mmap_config.vector_index_enable_mmap;
|
||||
mmap_config.vector_field_enable_mmap =
|
||||
c_mmap_config.vector_field_enable_mmap;
|
||||
mmap_config.mmap_populate = c_mmap_config.mmap_populate;
|
||||
milvus::storage::MmapManager::GetInstance().Init(mmap_config);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
||||
@ -278,6 +278,7 @@ func InitMmapManager(params *paramtable.ComponentParam, nodeID int64) error {
|
||||
scalar_field_enable_mmap: C.bool(params.QueryNodeCfg.MmapScalarField.GetAsBool()),
|
||||
vector_index_enable_mmap: C.bool(params.QueryNodeCfg.MmapVectorIndex.GetAsBool()),
|
||||
vector_field_enable_mmap: C.bool(params.QueryNodeCfg.MmapVectorField.GetAsBool()),
|
||||
mmap_populate: C.bool(params.QueryNodeCfg.MmapPopulate.GetAsBool()),
|
||||
}
|
||||
status := C.InitMmapManager(mmapConfig)
|
||||
return HandleCStatus(&status, "InitMmapManager failed")
|
||||
|
||||
@ -3253,6 +3253,7 @@ type queryNodeConfig struct {
|
||||
MmapVectorIndex ParamItem `refreshable:"false"`
|
||||
MmapScalarField ParamItem `refreshable:"false"`
|
||||
MmapScalarIndex ParamItem `refreshable:"false"`
|
||||
MmapPopulate ParamItem `refreshable:"false"`
|
||||
MmapJSONStats ParamItem `refreshable:"false"`
|
||||
GrowingMmapEnabled ParamItem `refreshable:"false"`
|
||||
FixedFileSizeForMmapManager ParamItem `refreshable:"false"`
|
||||
@ -3992,6 +3993,15 @@ This defaults to true, indicating that Milvus creates temporary index for growin
|
||||
}
|
||||
p.MmapScalarIndex.Init(base.mgr)
|
||||
|
||||
p.MmapPopulate = ParamItem{
|
||||
Key: "queryNode.mmap.populate",
|
||||
Version: "2.6.9",
|
||||
DefaultValue: "true",
|
||||
Doc: "MAP_POPULATE flag for mmap",
|
||||
Export: false,
|
||||
}
|
||||
p.MmapPopulate.Init(base.mgr)
|
||||
|
||||
p.MmapJSONStats = ParamItem{
|
||||
Key: "queryNode.mmap.jsonShredding",
|
||||
Version: "2.6.5",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user