mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-02-02 01:06:41 +08:00
enhance: use growingMmapEnabled to control the behavior of interim index, not vectorField (#36500)
issue:https://github.com/milvus-io/milvus/issues/36392 related pr: https://github.com/milvus-io/milvus/pull/36391 Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
parent
97ff012c67
commit
b474374ea5
@ -266,10 +266,14 @@ class IndexingRecord {
|
||||
void
|
||||
Initialize() {
|
||||
int offset_id = 0;
|
||||
auto enable_growing_mmap = storage::MmapManager::GetInstance()
|
||||
.GetMmapConfig()
|
||||
.GetEnableGrowingMmap();
|
||||
for (auto& [field_id, field_meta] : schema_.get_fields()) {
|
||||
++offset_id;
|
||||
if (field_meta.is_vector() &&
|
||||
segcore_config_.get_enable_interim_segment_index()) {
|
||||
segcore_config_.get_enable_interim_segment_index() &&
|
||||
!enable_growing_mmap) {
|
||||
// TODO: skip binary small index now, reenable after config.yaml is ready
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
continue;
|
||||
|
||||
@ -577,6 +577,11 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
|
||||
mmap_fields_.insert(field_id);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(num_rows);
|
||||
}
|
||||
|
||||
auto ok = unlink(filepath.c_str());
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to unlink mmap data file {}, err: {}",
|
||||
@ -592,8 +597,19 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
bool use_interim_index = false;
|
||||
if (generate_interim_index(field_id)) {
|
||||
std::unique_lock lck(mutex_);
|
||||
// mmap_fields is useless, no change
|
||||
fields_.erase(field_id);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
use_interim_index = true;
|
||||
}
|
||||
|
||||
if (!use_interim_index) {
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -1932,9 +1948,14 @@ SegmentSealedImpl::generate_interim_index(const FieldId field_id) {
|
||||
bool is_sparse =
|
||||
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT;
|
||||
|
||||
bool enable_growing_mmap = storage::MmapManager::GetInstance()
|
||||
.GetMmapConfig()
|
||||
.GetEnableGrowingMmap();
|
||||
|
||||
auto enable_binlog_index = [&]() {
|
||||
// checkout config
|
||||
if (!segcore_config_.get_enable_interim_segment_index()) {
|
||||
// check milvus config
|
||||
if (!segcore_config_.get_enable_interim_segment_index() ||
|
||||
enable_growing_mmap) {
|
||||
return false;
|
||||
}
|
||||
// check data type
|
||||
|
||||
@ -171,7 +171,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
knowhere::metric::IP,
|
||||
knowhere::IndexEnum::INDEX_SPARSE_WAND)));
|
||||
|
||||
TEST_P(BinlogIndexTest, Accuracy) {
|
||||
TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
|
||||
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
@ -265,6 +265,105 @@ TEST_P(BinlogIndexTest, Accuracy) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
|
||||
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
LoadOtherFields();
|
||||
|
||||
auto& segcore_config = milvus::segcore::SegcoreConfig::default_config();
|
||||
segcore_config.set_enable_interim_segment_index(true);
|
||||
segcore_config.set_nprobe(32);
|
||||
// 1. load field data, and build binlog index for binlog data
|
||||
FieldDataInfo field_data_info;
|
||||
field_data_info.field_id = vec_field_id.get();
|
||||
field_data_info.row_count = data_n;
|
||||
field_data_info.mmap_dir_path = "./data/mmap-test";
|
||||
field_data_info.channel->push(vec_field_data);
|
||||
field_data_info.channel->close();
|
||||
segment->MapFieldData(vec_field_id, field_data_info);
|
||||
|
||||
//assert segment has been built binlog index
|
||||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
|
||||
// 2. search binlog index
|
||||
auto num_queries = 10;
|
||||
|
||||
milvus::proto::plan::PlanNode plan_node;
|
||||
auto vector_anns = plan_node.mutable_vector_anns();
|
||||
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
|
||||
vector_anns->set_placeholder_tag("$0");
|
||||
vector_anns->set_field_id(vec_field_id.get());
|
||||
|
||||
auto query_info = vector_anns->mutable_query_info();
|
||||
query_info->set_topk(topk);
|
||||
query_info->set_round_decimal(3);
|
||||
query_info->set_metric_type(metric_type);
|
||||
query_info->set_search_params(R"({"nprobe": 1024})");
|
||||
auto plan_str = plan_node.SerializeAsString();
|
||||
|
||||
auto ph_group_raw =
|
||||
data_type == DataType::VECTOR_FLOAT
|
||||
? CreatePlaceholderGroupFromBlob(
|
||||
num_queries,
|
||||
data_d,
|
||||
GenRandomFloatVecData(num_queries, data_d).get())
|
||||
: CreateSparseFloatPlaceholderGroup(num_queries);
|
||||
|
||||
auto plan = milvus::query::CreateSearchPlanByExpr(
|
||||
*schema, plan_str.data(), plan_str.size());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
std::vector<const milvus::query::PlaceholderGroup*> ph_group_arr = {
|
||||
ph_group.get()};
|
||||
auto nlist = segcore_config.get_nlist();
|
||||
auto binlog_index_sr =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
ASSERT_EQ(binlog_index_sr->total_nq_, num_queries);
|
||||
EXPECT_EQ(binlog_index_sr->unity_topK_, topk);
|
||||
EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk);
|
||||
EXPECT_EQ(binlog_index_sr->seg_offsets_.size(), num_queries * topk);
|
||||
|
||||
// 3. update vector index
|
||||
{
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = data_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, milvus::storage::FileManagerContext());
|
||||
|
||||
auto build_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(data_d)},
|
||||
{knowhere::indexparam::NLIST, "1024"}};
|
||||
indexing->BuildWithDataset(raw_dataset, build_conf);
|
||||
|
||||
LoadIndexInfo load_info;
|
||||
load_info.field_id = vec_field_id.get();
|
||||
|
||||
load_info.index = std::move(indexing);
|
||||
load_info.index_params["metric_type"] = metric_type;
|
||||
segment->DropFieldData(vec_field_id);
|
||||
ASSERT_NO_THROW(segment->LoadIndex(load_info));
|
||||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
auto similary = GetKnnSearchRecall(num_queries,
|
||||
binlog_index_sr->seg_offsets_.data(),
|
||||
topk,
|
||||
ivf_sr->seg_offsets_.data(),
|
||||
topk);
|
||||
ASSERT_GT(similary, 0.45);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BinlogIndexTest, DisableInterimIndex) {
|
||||
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);
|
||||
|
||||
|
||||
@ -1465,15 +1465,17 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn
|
||||
|
||||
if !mmapEnabled || common.IsSystemField(fieldSchema.GetFieldID()) {
|
||||
segmentMemorySize += binlogSize
|
||||
if multiplyFactor.enableTempSegmentIndex && SupportInterimIndexDataType(fieldSchema.GetDataType()) {
|
||||
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
|
||||
}
|
||||
if DoubleMemorySystemField(fieldSchema.GetFieldID()) || DoubleMemoryDataType(fieldSchema.GetDataType()) {
|
||||
segmentMemorySize += binlogSize
|
||||
}
|
||||
} else {
|
||||
segmentDiskSize += uint64(getBinlogDataDiskSize(fieldBinlog))
|
||||
}
|
||||
// querynode will generate a (memory type) intermin index for vector type
|
||||
interimIndexEnable := multiplyFactor.enableTempSegmentIndex && !isGrowingMmapEnable() && SupportInterimIndexDataType(fieldSchema.GetDataType())
|
||||
if interimIndexEnable {
|
||||
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
|
||||
}
|
||||
}
|
||||
|
||||
if mmapEnabled {
|
||||
|
||||
@ -284,3 +284,7 @@ func isDataMmapEnable(fieldSchema *schemapb.FieldSchema) bool {
|
||||
}
|
||||
return params.Params.QueryNodeCfg.MmapScalarField.GetAsBool()
|
||||
}
|
||||
|
||||
func isGrowingMmapEnable() bool {
|
||||
return params.Params.QueryNodeCfg.GrowingMmapEnabled.GetAsBool()
|
||||
}
|
||||
|
||||
@ -2709,10 +2709,6 @@ This defaults to true, indicating that Milvus creates temporary index for growin
|
||||
By activating this feature, the memory overhead associated with newly added or modified data will be significantly minimized.
|
||||
However, this optimization may come at the cost of a slight decrease in query latency for the affected data segments.`,
|
||||
Export: true,
|
||||
Formatter: func(v string) string {
|
||||
mmapEnabled := p.MmapEnabled.GetAsBool()
|
||||
return strconv.FormatBool(mmapEnabled && getAsBool(v))
|
||||
},
|
||||
}
|
||||
p.GrowingMmapEnabled.Init(base.mgr)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user