mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: apply mmap settings correctly during segment load (#46017)
Previously, mmap settings configured at the collection or field level were not being applied during segment loading in segcore. This was caused by: 1. A typo in the key name: "mmap.enable" instead of "mmap.enabled" 2. Missing logic to parse and apply mmap settings from schema This commit fixes the issue by: - Correcting the key name to "mmap.enabled" to match the standard - Adding Schema::MmapEnabled() method to retrieve field/collection level mmap settings with proper fallback logic - Parsing mmap settings from field type_params and collection properties during schema parsing - Applying computed mmap settings in LoadColumnGroup() and Load() methods instead of hardcoded false values - Using global MmapConfig as fallback when no explicit setting exists The mmap setting priority is now: 1. Field-level mmap setting (from type_params) 2. Collection-level mmap setting (from properties) 3. Global mmap config (from MmapManager) For column groups, if any field has mmap enabled, the entire group uses mmap (since they are loaded together). Related issue: #45060 Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
dd3797f3ac
commit
5d0c8b1b40
@ -133,3 +133,4 @@ const std::string ELEMENT_TYPE_KEY_FOR_ARROW = "elementType";
|
||||
// EPSILON value for comparing float numbers
|
||||
const float EPSILON = 0.0000000119;
|
||||
const std::string NAMESPACE_FIELD_NAME = "$namespace_id";
|
||||
const std::string MMAP_ENABLED_KEY = "mmap.enabled";
|
||||
|
||||
@ -26,7 +26,9 @@
|
||||
#include "Schema.h"
|
||||
#include "SystemProperty.h"
|
||||
#include "arrow/util/key_value_metadata.h"
|
||||
#include "common/Consts.h"
|
||||
#include "milvus-storage/common/constants.h"
|
||||
#include "pb/common.pb.h"
|
||||
#include "protobuf_utils.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -61,6 +63,12 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||
if (child.name() == namespace_field_name) {
|
||||
schema->set_namespace_field_id(field_id);
|
||||
}
|
||||
|
||||
auto [has_setting, enabled] =
|
||||
GetBoolFromRepeatedKVs(child.type_params(), MMAP_ENABLED_KEY);
|
||||
if (has_setting) {
|
||||
schema->mmap_fields_[field_id] = enabled;
|
||||
}
|
||||
};
|
||||
|
||||
for (const milvus::proto::schema::FieldSchema& child :
|
||||
@ -75,6 +83,9 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||
}
|
||||
}
|
||||
|
||||
std::tie(schema->has_mmap_setting_, schema->mmap_enabled_) =
|
||||
GetBoolFromRepeatedKVs(schema_proto.properties(), MMAP_ENABLED_KEY);
|
||||
|
||||
AssertInfo(schema->get_primary_field_id().has_value(),
|
||||
"primary key should be specified");
|
||||
|
||||
@ -150,4 +161,14 @@ Schema::AbsentFields(Schema& old_schema) const {
|
||||
return std::make_unique<std::vector<FieldMeta>>(result);
|
||||
}
|
||||
|
||||
std::pair<bool, bool>
|
||||
Schema::MmapEnabled(const FieldId& field_id) const {
|
||||
auto it = mmap_fields_.find(field_id);
|
||||
// fallback to collection-level config
|
||||
if (it == mmap_fields_.end()) {
|
||||
return {has_mmap_setting_, mmap_enabled_};
|
||||
}
|
||||
return {true, it->second};
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
||||
@ -374,6 +374,24 @@ class Schema {
|
||||
std::unique_ptr<std::vector<FieldMeta>>
|
||||
AbsentFields(Schema& old_schema) const;
|
||||
|
||||
/**
|
||||
* @brief Determines whether the specified field should use mmap for data loading.
|
||||
*
|
||||
* This function checks mmap settings at the field level first. If no field-level
|
||||
* setting is found, it falls back to the collection-level mmap configuration.
|
||||
*
|
||||
* @param field The field ID to check mmap settings for.
|
||||
*
|
||||
* @return A pair of booleans:
|
||||
* - first: Whether an mmap setting exists (at field or collection level).
|
||||
* - second: Whether mmap is enabled (only meaningful when first is true).
|
||||
*
|
||||
* @note If no mmap setting exists at any level, first will be false and second
|
||||
* should be ignored.
|
||||
*/
|
||||
std::pair<bool, bool>
|
||||
MmapEnabled(const FieldId& field) const;
|
||||
|
||||
private:
|
||||
int64_t debug_id = START_USER_FIELDID;
|
||||
std::vector<FieldId> field_ids_;
|
||||
@ -395,6 +413,11 @@ class Schema {
|
||||
|
||||
// schema_version_, currently marked with update timestamp
|
||||
uint64_t schema_version_;
|
||||
|
||||
// mmap settings
|
||||
bool has_mmap_setting_ = false;
|
||||
bool mmap_enabled_ = false;
|
||||
std::unordered_map<FieldId, bool> mmap_fields_;
|
||||
};
|
||||
|
||||
using SchemaPtr = std::shared_ptr<Schema>;
|
||||
|
||||
212
internal/core/src/common/SchemaTest.cpp
Normal file
212
internal/core/src/common/SchemaTest.cpp
Normal file
@ -0,0 +1,212 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common/Schema.h"
|
||||
|
||||
using namespace milvus;
|
||||
|
||||
class SchemaTest : public ::testing::Test {
|
||||
protected:
|
||||
void
|
||||
SetUp() override {
|
||||
schema_ = std::make_shared<Schema>();
|
||||
}
|
||||
|
||||
std::shared_ptr<Schema> schema_;
|
||||
};
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledNoSetting) {
|
||||
// Add a field without any mmap setting
|
||||
auto field_id = schema_->AddDebugField("test_field", DataType::INT64);
|
||||
schema_->set_primary_field_id(field_id);
|
||||
|
||||
// When no mmap setting exists at any level, first should be false
|
||||
auto [has_setting, enabled] = schema_->MmapEnabled(field_id);
|
||||
EXPECT_FALSE(has_setting);
|
||||
// The enabled value is undefined when has_setting is false, so we don't check it
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledCollectionLevelEnabled) {
|
||||
// Create schema with collection-level mmap enabled via protobuf
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* field = schema_proto.add_fields();
|
||||
field->set_fieldid(100);
|
||||
field->set_name("pk_field");
|
||||
field->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
field->set_is_primary_key(true);
|
||||
|
||||
// Set collection-level mmap enabled
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("true");
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
FieldId pk_field_id(100);
|
||||
|
||||
auto [has_setting, enabled] = parsed_schema->MmapEnabled(pk_field_id);
|
||||
EXPECT_TRUE(has_setting);
|
||||
EXPECT_TRUE(enabled);
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledCollectionLevelDisabled) {
|
||||
// Create schema with collection-level mmap disabled via protobuf
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* field = schema_proto.add_fields();
|
||||
field->set_fieldid(100);
|
||||
field->set_name("pk_field");
|
||||
field->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
field->set_is_primary_key(true);
|
||||
|
||||
// Set collection-level mmap disabled
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("false");
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
FieldId pk_field_id(100);
|
||||
|
||||
auto [has_setting, enabled] = parsed_schema->MmapEnabled(pk_field_id);
|
||||
EXPECT_TRUE(has_setting);
|
||||
EXPECT_FALSE(enabled);
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledCollectionLevelCaseInsensitive) {
|
||||
// Test that mmap value parsing is case-insensitive
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* field = schema_proto.add_fields();
|
||||
field->set_fieldid(100);
|
||||
field->set_name("pk_field");
|
||||
field->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
field->set_is_primary_key(true);
|
||||
|
||||
// Set collection-level mmap with uppercase TRUE
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("TRUE");
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
FieldId pk_field_id(100);
|
||||
|
||||
auto [has_setting, enabled] = parsed_schema->MmapEnabled(pk_field_id);
|
||||
EXPECT_TRUE(has_setting);
|
||||
EXPECT_TRUE(enabled);
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledFieldLevelOverridesCollectionLevel) {
|
||||
// Test that field-level mmap setting overrides collection-level setting
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* field = schema_proto.add_fields();
|
||||
field->set_fieldid(100);
|
||||
field->set_name("pk_field");
|
||||
field->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
field->set_is_primary_key(true);
|
||||
|
||||
// Set collection-level mmap enabled
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("true");
|
||||
|
||||
// Note: Field-level mmap settings are set via schema_proto.properties()
|
||||
// in the current implementation, which applies to all fields.
|
||||
// This test verifies the fallback behavior when no field-level override exists.
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
FieldId pk_field_id(100);
|
||||
|
||||
// Without field-level override, should use collection-level setting
|
||||
auto [has_setting, enabled] = parsed_schema->MmapEnabled(pk_field_id);
|
||||
EXPECT_TRUE(has_setting);
|
||||
EXPECT_TRUE(enabled);
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledNonExistentField) {
|
||||
// Test MmapEnabled with a field that doesn't exist in mmap_fields_
|
||||
// but collection-level setting exists
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* field1 = schema_proto.add_fields();
|
||||
field1->set_fieldid(100);
|
||||
field1->set_name("pk_field");
|
||||
field1->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
field1->set_is_primary_key(true);
|
||||
|
||||
auto* field2 = schema_proto.add_fields();
|
||||
field2->set_fieldid(101);
|
||||
field2->set_name("data_field");
|
||||
field2->set_data_type(milvus::proto::schema::DataType::Float);
|
||||
|
||||
// Set collection-level mmap enabled
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("true");
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
|
||||
// Both fields should fallback to collection-level setting
|
||||
FieldId pk_field_id(100);
|
||||
auto [has_setting1, enabled1] = parsed_schema->MmapEnabled(pk_field_id);
|
||||
EXPECT_TRUE(has_setting1);
|
||||
EXPECT_TRUE(enabled1);
|
||||
|
||||
FieldId data_field_id(101);
|
||||
auto [has_setting2, enabled2] = parsed_schema->MmapEnabled(data_field_id);
|
||||
EXPECT_TRUE(has_setting2);
|
||||
EXPECT_TRUE(enabled2);
|
||||
|
||||
// Test with a field ID that was never added to the schema
|
||||
FieldId non_existent_field_id(999);
|
||||
auto [has_setting3, enabled3] =
|
||||
parsed_schema->MmapEnabled(non_existent_field_id);
|
||||
EXPECT_TRUE(has_setting3); // Falls back to collection-level
|
||||
EXPECT_TRUE(enabled3);
|
||||
}
|
||||
|
||||
TEST_F(SchemaTest, MmapEnabledMultipleFields) {
|
||||
// Test MmapEnabled with multiple fields, all using collection-level setting
|
||||
milvus::proto::schema::CollectionSchema schema_proto;
|
||||
|
||||
auto* pk_field = schema_proto.add_fields();
|
||||
pk_field->set_fieldid(100);
|
||||
pk_field->set_name("pk_field");
|
||||
pk_field->set_data_type(milvus::proto::schema::DataType::Int64);
|
||||
pk_field->set_is_primary_key(true);
|
||||
|
||||
auto* int_field = schema_proto.add_fields();
|
||||
int_field->set_fieldid(101);
|
||||
int_field->set_name("int_field");
|
||||
int_field->set_data_type(milvus::proto::schema::DataType::Int32);
|
||||
|
||||
auto* float_field = schema_proto.add_fields();
|
||||
float_field->set_fieldid(102);
|
||||
float_field->set_name("float_field");
|
||||
float_field->set_data_type(milvus::proto::schema::DataType::Float);
|
||||
|
||||
// Set collection-level mmap disabled
|
||||
auto* prop = schema_proto.add_properties();
|
||||
prop->set_key("mmap.enabled");
|
||||
prop->set_value("false");
|
||||
|
||||
auto parsed_schema = Schema::ParseFrom(schema_proto);
|
||||
|
||||
// All fields should have the same collection-level setting
|
||||
for (int64_t id = 100; id <= 102; ++id) {
|
||||
FieldId field_id(id);
|
||||
auto [has_setting, enabled] = parsed_schema->MmapEnabled(field_id);
|
||||
EXPECT_TRUE(has_setting);
|
||||
EXPECT_FALSE(enabled);
|
||||
}
|
||||
}
|
||||
@ -45,6 +45,32 @@ RepeatedKeyValToMap(
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get a boolean value from repeated KeyValuePair by key.
|
||||
*
|
||||
* @param kvs The repeated KeyValuePair field to search.
|
||||
* @param key The key to look for.
|
||||
* @return std::pair<bool, bool> where:
|
||||
* - first: whether the key was found.
|
||||
* - second: the parsed boolean value (true if value is "true", case-insensitive).
|
||||
*/
|
||||
static std::pair<bool, bool>
|
||||
GetBoolFromRepeatedKVs(
|
||||
const google::protobuf::RepeatedPtrField<proto::common::KeyValuePair>& kvs,
|
||||
const std::string& key) {
|
||||
for (auto& kv : kvs) {
|
||||
if (kv.key() == key) {
|
||||
std::string lower;
|
||||
std::transform(kv.value().begin(),
|
||||
kv.value().end(),
|
||||
std::back_inserter(lower),
|
||||
::tolower);
|
||||
return {true, lower == "true"};
|
||||
}
|
||||
}
|
||||
return {false, false};
|
||||
}
|
||||
|
||||
class ProtoLayout;
|
||||
using ProtoLayoutPtr = std::unique_ptr<ProtoLayout>;
|
||||
|
||||
|
||||
@ -289,16 +289,20 @@ ChunkedSegmentSealedImpl::ConvertFieldIndexInfoToLoadIndexInfo(
|
||||
}
|
||||
}
|
||||
|
||||
bool mmap_enabled = false;
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
auto use_mmap = IsVectorDataType(field_meta.get_data_type())
|
||||
? mmap_config.GetVectorIndexEnableMmap()
|
||||
: mmap_config.GetScalarIndexEnableMmap();
|
||||
|
||||
// Set index params
|
||||
for (const auto& kv_pair : field_index_info->index_params()) {
|
||||
if (kv_pair.key() == "mmap.enable") {
|
||||
if (kv_pair.key() == "mmap.enabled") {
|
||||
std::string lower;
|
||||
std::transform(kv_pair.value().begin(),
|
||||
kv_pair.value().end(),
|
||||
std::back_inserter(lower),
|
||||
::tolower);
|
||||
mmap_enabled = lower == "true";
|
||||
use_mmap = (lower == "true");
|
||||
}
|
||||
load_index_info.index_params[kv_pair.key()] = kv_pair.value();
|
||||
}
|
||||
@ -316,7 +320,7 @@ ChunkedSegmentSealedImpl::ConvertFieldIndexInfoToLoadIndexInfo(
|
||||
milvus::storage::LocalChunkManagerSingleton::GetInstance()
|
||||
.GetChunkManager()
|
||||
->GetRootPath();
|
||||
load_index_info.enable_mmap = mmap_enabled;
|
||||
load_index_info.enable_mmap = use_mmap;
|
||||
|
||||
return load_index_info;
|
||||
}
|
||||
@ -393,6 +397,46 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
|
||||
|
||||
auto field_metas = schema_->get_field_metas(milvus_field_ids);
|
||||
|
||||
// assumption: vector field occupies whole column group
|
||||
bool is_vector = false;
|
||||
bool index_has_rawdata = true;
|
||||
bool has_mmap_setting = false;
|
||||
bool mmap_enabled = false;
|
||||
for (auto& [field_id, field_meta] : field_metas) {
|
||||
if (IsVectorDataType(field_meta.get_data_type())) {
|
||||
is_vector = true;
|
||||
}
|
||||
std::shared_lock lck(mutex_);
|
||||
if (index_has_raw_data_.find(field_id) != index_has_raw_data_.end()) {
|
||||
index_has_rawdata =
|
||||
index_has_raw_data_.at(field_id) && index_has_rawdata;
|
||||
} else {
|
||||
index_has_rawdata = false;
|
||||
}
|
||||
|
||||
// if field has mmap setting, use it
|
||||
// - mmap setting at collection level, then all field are the same
|
||||
// - mmap setting at field level, we define that as long as one field shall be mmap, then whole group shall be mmaped
|
||||
auto [field_has_setting, field_mmap_enabled] =
|
||||
schema_->MmapEnabled(field_id);
|
||||
has_mmap_setting = has_mmap_setting || field_has_setting;
|
||||
mmap_enabled = mmap_enabled || field_mmap_enabled;
|
||||
}
|
||||
|
||||
if (index_has_rawdata) {
|
||||
LOG_INFO(
|
||||
"[StorageV2] segment {} index(es) provide all raw data for column "
|
||||
"group index {}, skip loading binlog",
|
||||
this->get_segment_id(),
|
||||
index);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
bool global_use_mmap = is_vector ? mmap_config.GetVectorFieldEnableMmap()
|
||||
: mmap_config.GetScalarFieldEnableMmap();
|
||||
auto use_mmap = has_mmap_setting ? mmap_enabled : global_use_mmap;
|
||||
|
||||
auto chunk_reader_result = reader_->get_chunk_reader(index);
|
||||
AssertInfo(chunk_reader_result.ok(),
|
||||
"get chunk reader failed, segment {}, column group index {}",
|
||||
@ -411,7 +455,7 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
|
||||
index,
|
||||
std::move(chunk_reader),
|
||||
field_metas,
|
||||
false, // TODO
|
||||
use_mmap,
|
||||
column_group->columns.size(),
|
||||
segment_load_info_.priority());
|
||||
auto chunked_column_group =
|
||||
@ -429,7 +473,7 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
|
||||
column,
|
||||
segment_load_info_.num_of_rows(),
|
||||
data_type,
|
||||
false, // info.enable_mmap,
|
||||
use_mmap,
|
||||
true,
|
||||
std::
|
||||
nullopt); // manifest cannot provide parquet skip index directly
|
||||
@ -530,7 +574,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
|
||||
std::vector<FieldId> milvus_field_ids;
|
||||
milvus_field_ids.reserve(field_id_list.size());
|
||||
for (int i = 0; i < field_id_list.size(); ++i) {
|
||||
milvus_field_ids.push_back(FieldId(field_id_list.Get(i)));
|
||||
milvus_field_ids.emplace_back(field_id_list.Get(i));
|
||||
merged_in_load_list = merged_in_load_list ||
|
||||
schema_->ShouldLoadField(milvus_field_ids[i]);
|
||||
}
|
||||
@ -2960,6 +3004,31 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
|
||||
}
|
||||
field_binlog_info.row_count = total_entries;
|
||||
|
||||
bool has_mmap_setting = false;
|
||||
bool mmap_enabled = false;
|
||||
bool is_vector = false;
|
||||
for (const auto& child_field_id : field_binlog.child_fields()) {
|
||||
auto& field_meta = schema_->operator[](FieldId(child_field_id));
|
||||
if (IsVectorDataType(field_meta.get_data_type())) {
|
||||
is_vector = true;
|
||||
}
|
||||
|
||||
// if field has mmap setting, use it
|
||||
// - mmap setting at collection level, then all field are the same
|
||||
// - mmap setting at field level, we define that as long as one field shall be mmap, then whole group shall be mmaped
|
||||
auto [field_has_setting, field_mmap_enabled] =
|
||||
schema_->MmapEnabled(field_id);
|
||||
has_mmap_setting = has_mmap_setting || field_has_setting;
|
||||
mmap_enabled = mmap_enabled || field_mmap_enabled;
|
||||
}
|
||||
|
||||
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
|
||||
auto global_use_mmap = is_vector
|
||||
? mmap_config.GetVectorFieldEnableMmap()
|
||||
: mmap_config.GetScalarFieldEnableMmap();
|
||||
field_binlog_info.enable_mmap =
|
||||
has_mmap_setting ? mmap_enabled : global_use_mmap;
|
||||
|
||||
// Store in map
|
||||
load_field_data_info.field_infos[field_id.get()] = field_binlog_info;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user