enhance: use growingMmapEnabled to control the behavior of interim index, not vectorField (#36500)

issue:https://github.com/milvus-io/milvus/issues/36392
related pr: https://github.com/milvus-io/milvus/pull/36391

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
cqy123456 2024-10-17 20:25:24 +08:00 committed by GitHub
parent 97ff012c67
commit b474374ea5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 139 additions and 13 deletions

View File

@ -266,10 +266,14 @@ class IndexingRecord {
void
Initialize() {
int offset_id = 0;
auto enable_growing_mmap = storage::MmapManager::GetInstance()
.GetMmapConfig()
.GetEnableGrowingMmap();
for (auto& [field_id, field_meta] : schema_.get_fields()) {
++offset_id;
if (field_meta.is_vector() &&
segcore_config_.get_enable_interim_segment_index()) {
segcore_config_.get_enable_interim_segment_index() &&
!enable_growing_mmap) {
// TODO: skip binary small index now, reenable after config.yaml is ready
if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
continue;

View File

@ -577,6 +577,11 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
mmap_fields_.insert(field_id);
}
{
std::unique_lock lck(mutex_);
update_row_count(num_rows);
}
auto ok = unlink(filepath.c_str());
AssertInfo(ok == 0,
fmt::format("failed to unlink mmap data file {}, err: {}",
@ -592,8 +597,19 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
insert_record_.seal_pks();
}
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
bool use_interim_index = false;
if (generate_interim_index(field_id)) {
std::unique_lock lck(mutex_);
// mmap_fields is useless, no change
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
use_interim_index = true;
}
if (!use_interim_index) {
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
}
}
void
@ -1932,9 +1948,14 @@ SegmentSealedImpl::generate_interim_index(const FieldId field_id) {
bool is_sparse =
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT;
bool enable_growing_mmap = storage::MmapManager::GetInstance()
.GetMmapConfig()
.GetEnableGrowingMmap();
auto enable_binlog_index = [&]() {
// checkout config
if (!segcore_config_.get_enable_interim_segment_index()) {
// check milvus config
if (!segcore_config_.get_enable_interim_segment_index() ||
enable_growing_mmap) {
return false;
}
// check data type

View File

@ -171,7 +171,7 @@ INSTANTIATE_TEST_SUITE_P(
knowhere::metric::IP,
knowhere::IndexEnum::INDEX_SPARSE_WAND)));
TEST_P(BinlogIndexTest, Accuracy) {
TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);
segment = CreateSealedSegment(schema, collection_index_meta);
@ -265,6 +265,105 @@ TEST_P(BinlogIndexTest, Accuracy) {
}
}
TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);
segment = CreateSealedSegment(schema, collection_index_meta);
LoadOtherFields();
auto& segcore_config = milvus::segcore::SegcoreConfig::default_config();
segcore_config.set_enable_interim_segment_index(true);
segcore_config.set_nprobe(32);
// 1. load field data, and build binlog index for binlog data
FieldDataInfo field_data_info;
field_data_info.field_id = vec_field_id.get();
field_data_info.row_count = data_n;
field_data_info.mmap_dir_path = "./data/mmap-test";
field_data_info.channel->push(vec_field_data);
field_data_info.channel->close();
segment->MapFieldData(vec_field_id, field_data_info);
//assert segment has been built binlog index
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
// 2. search binlog index
auto num_queries = 10;
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(vec_field_id.get());
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(topk);
query_info->set_round_decimal(3);
query_info->set_metric_type(metric_type);
query_info->set_search_params(R"({"nprobe": 1024})");
auto plan_str = plan_node.SerializeAsString();
auto ph_group_raw =
data_type == DataType::VECTOR_FLOAT
? CreatePlaceholderGroupFromBlob(
num_queries,
data_d,
GenRandomFloatVecData(num_queries, data_d).get())
: CreateSparseFloatPlaceholderGroup(num_queries);
auto plan = milvus::query::CreateSearchPlanByExpr(
*schema, plan_str.data(), plan_str.size());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
std::vector<const milvus::query::PlaceholderGroup*> ph_group_arr = {
ph_group.get()};
auto nlist = segcore_config.get_nlist();
auto binlog_index_sr =
segment->Search(plan.get(), ph_group.get(), 1L << 63);
ASSERT_EQ(binlog_index_sr->total_nq_, num_queries);
EXPECT_EQ(binlog_index_sr->unity_topK_, topk);
EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk);
EXPECT_EQ(binlog_index_sr->seg_offsets_.size(), num_queries * topk);
// 3. update vector index
{
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = data_type;
create_index_info.metric_type = metric_type;
create_index_info.index_type = index_type;
create_index_info.index_engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, milvus::storage::FileManagerContext());
auto build_conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(data_d)},
{knowhere::indexparam::NLIST, "1024"}};
indexing->BuildWithDataset(raw_dataset, build_conf);
LoadIndexInfo load_info;
load_info.field_id = vec_field_id.get();
load_info.index = std::move(indexing);
load_info.index_params["metric_type"] = metric_type;
segment->DropFieldData(vec_field_id);
ASSERT_NO_THROW(segment->LoadIndex(load_info));
EXPECT_TRUE(segment->HasIndex(vec_field_id));
EXPECT_EQ(segment->get_row_count(), data_n);
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63);
auto similary = GetKnnSearchRecall(num_queries,
binlog_index_sr->seg_offsets_.data(),
topk,
ivf_sr->seg_offsets_.data(),
topk);
ASSERT_GT(similary, 0.45);
}
}
TEST_P(BinlogIndexTest, DisableInterimIndex) {
IndexMetaPtr collection_index_meta = GetCollectionIndexMeta(index_type);

View File

@ -1465,15 +1465,17 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn
if !mmapEnabled || common.IsSystemField(fieldSchema.GetFieldID()) {
segmentMemorySize += binlogSize
if multiplyFactor.enableTempSegmentIndex && SupportInterimIndexDataType(fieldSchema.GetDataType()) {
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
}
if DoubleMemorySystemField(fieldSchema.GetFieldID()) || DoubleMemoryDataType(fieldSchema.GetDataType()) {
segmentMemorySize += binlogSize
}
} else {
segmentDiskSize += uint64(getBinlogDataDiskSize(fieldBinlog))
}
// querynode will generate a (memory type) intermin index for vector type
interimIndexEnable := multiplyFactor.enableTempSegmentIndex && !isGrowingMmapEnable() && SupportInterimIndexDataType(fieldSchema.GetDataType())
if interimIndexEnable {
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
}
}
if mmapEnabled {

View File

@ -284,3 +284,7 @@ func isDataMmapEnable(fieldSchema *schemapb.FieldSchema) bool {
}
return params.Params.QueryNodeCfg.MmapScalarField.GetAsBool()
}
func isGrowingMmapEnable() bool {
return params.Params.QueryNodeCfg.GrowingMmapEnabled.GetAsBool()
}

View File

@ -2709,10 +2709,6 @@ This defaults to true, indicating that Milvus creates temporary index for growin
By activating this feature, the memory overhead associated with newly added or modified data will be significantly minimized.
However, this optimization may come at the cost of a slight decrease in query latency for the affected data segments.`,
Export: true,
Formatter: func(v string) string {
mmapEnabled := p.MmapEnabled.GetAsBool()
return strconv.FormatBool(mmapEnabled && getAsBool(v))
},
}
p.GrowingMmapEnabled.Init(base.mgr)