enhance: tiered index updates (#44433)

issue: #42032 #44212 

- special case for warmup param and cell storage size for tiered index
- add a config to enable/disable storage usage tracking

---------

Signed-off-by: chasingegg <chao.gao@zilliz.com>
This commit is contained in:
Gao 2025-09-22 21:34:11 +08:00 committed by GitHub
parent 75557f3eb8
commit 539f17f1ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 37 additions and 4 deletions

View File

@ -509,6 +509,7 @@ queryNode:
# If a cached data hasn't been accessed again after this time since its last access, it will be evicted.
# If set to 0, time based eviction is disabled.
cacheTtl: 0
storageUsageTrackingEnabled: false # Enable storage usage tracking for Tiered Storage. Defaults to false.
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
deleteDumpBatchSize: 10000 # Batch size for delete snapshot dump in segcore.
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments

View File

@ -195,6 +195,7 @@ ConfigureTieredStorage(const CacheWarmupPolicy scalarFieldCacheWarmupPolicy,
const int64_t disk_low_watermark_bytes,
const int64_t disk_high_watermark_bytes,
const int64_t disk_max_bytes,
const bool storage_usage_tracking_enabled,
const bool eviction_enabled,
const int64_t cache_touch_window_ms,
const bool background_eviction_enabled,
@ -216,6 +217,7 @@ ConfigureTieredStorage(const CacheWarmupPolicy scalarFieldCacheWarmupPolicy,
disk_low_watermark_bytes,
disk_high_watermark_bytes,
disk_max_bytes},
storage_usage_tracking_enabled,
eviction_enabled,
{cache_touch_window_ms,
background_eviction_enabled,

View File

@ -106,6 +106,8 @@ ConfigureTieredStorage(
const int64_t disk_low_watermark_bytes,
const int64_t disk_high_watermark_bytes,
const int64_t disk_max_bytes,
// storage usage tracking enabled
const bool storage_usage_tracking_enabled,
// eviction enabled
const bool eviction_enabled,
// eviction configs

View File

@ -39,8 +39,16 @@ SealedIndexTranslator::SealedIndexTranslator(
milvus::segcore::getCellDataType(
/* is_vector */ IsVectorDataType(load_index_info->field_type),
/* is_index */ true),
milvus::segcore::getCacheWarmupPolicy(
/* is_vector */ IsVectorDataType(load_index_info->field_type),
// if index data supports lazy load internally, we always use sync for index metadata
// warmup policy will be used for index internally
// currently only vector index is possible to support lazy load
(IsVectorDataType(load_index_info->field_type) &&
knowhere::IndexFactory::Instance().FeatureCheck(
index_info_.index_type, knowhere::feature::LAZY_LOAD))
? CacheWarmupPolicy::CacheWarmupPolicy_Sync
: milvus::segcore::getCacheWarmupPolicy(
/* is_vector */ IsVectorDataType(
load_index_info->field_type),
/* is_index */ true),
/* support_eviction */
// if index data supports lazy load internally, we don't need to support eviction for index metadata

View File

@ -44,6 +44,13 @@ class SealedIndexTranslator
int64_t
cells_storage_bytes(
const std::vector<milvus::cachinglayer::cid_t>& cids) const override {
// if index data supports lazy load internally, cell storage size becomes 0
// currently only vector index is possible to support lazy load
if (IsVectorDataType(index_load_info_.field_type) &&
knowhere::IndexFactory::Instance().FeatureCheck(
index_info_.index_type, knowhere::feature::LAZY_LOAD)) {
return 0;
}
constexpr int64_t MIN_STORAGE_BYTES = 1 * 1024 * 1024;
return std::max(index_load_info_.index_size, MIN_STORAGE_BYTES);
}

View File

@ -13,7 +13,7 @@
milvus_add_pkg_config("milvus-common")
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( MILVUS-COMMON-VERSION 4767d68 )
set( MILVUS-COMMON-VERSION 3ab63a9 )
set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git")
message(STATUS "milvus-common repo: ${GIT_REPOSITORY}")

View File

@ -38,6 +38,7 @@ main(int argc, char** argv) {
CacheWarmupPolicy::CacheWarmupPolicy_Disable},
{1024 * mb, 1024 * mb, 1024 * mb, 1024 * mb, 1024 * mb, 1024 * mb},
true,
true,
{10, true, 30});
return RUN_ALL_TESTS();

View File

@ -351,6 +351,7 @@ func InitTieredStorage(params *paramtable.ComponentParam) error {
diskHighWatermarkBytes := C.int64_t(diskHighWatermarkRatio * float64(osDiskBytes))
diskMaxBytes := C.int64_t(diskMaxRatio * float64(osDiskBytes))
storageUsageTrackingEnabled := C.bool(params.QueryNodeCfg.StorageUsageTrackingEnabled.GetAsBool())
evictionEnabled := C.bool(params.QueryNodeCfg.TieredEvictionEnabled.GetAsBool())
cacheTouchWindowMs := C.int64_t(params.QueryNodeCfg.TieredCacheTouchWindowMs.GetAsInt64())
backgroundEvictionEnabled := C.bool(params.QueryNodeCfg.TieredBackgroundEvictionEnabled.GetAsBool())
@ -368,6 +369,7 @@ func InitTieredStorage(params *paramtable.ComponentParam) error {
vectorIndexCacheWarmupPolicy,
memoryLowWatermarkBytes, memoryHighWatermarkBytes, memoryMaxBytes,
diskLowWatermarkBytes, diskHighWatermarkBytes, diskMaxBytes,
storageUsageTrackingEnabled,
evictionEnabled, cacheTouchWindowMs,
backgroundEvictionEnabled, evictionIntervalMs, cacheCellUnaccessedSurvivalTime,
overloadedMemoryThresholdPercentage, loadingResourceFactor, maxDiskUsagePercentage, diskPath)

View File

@ -3057,6 +3057,7 @@ type queryNodeConfig struct {
TieredEvictionIntervalMs ParamItem `refreshable:"false"`
CacheCellUnaccessedSurvivalTime ParamItem `refreshable:"false"`
TieredLoadingResourceFactor ParamItem `refreshable:"false"`
StorageUsageTrackingEnabled ParamItem `refreshable:"false"`
KnowhereScoreConsistency ParamItem `refreshable:"false"`
@ -3441,6 +3442,15 @@ If set to 0, time based eviction is disabled.`,
}
p.CacheCellUnaccessedSurvivalTime.Init(base.mgr)
p.StorageUsageTrackingEnabled = ParamItem{
Key: "queryNode.segcore.tieredStorage.storageUsageTrackingEnabled",
Version: "2.6.3",
DefaultValue: "false",
Doc: "Enable storage usage tracking for Tiered Storage. Defaults to false.",
Export: true,
}
p.StorageUsageTrackingEnabled.Init(base.mgr)
p.TieredLoadingResourceFactor = ParamItem{
Key: "queryNode.segcore.tieredStorage.loadingResourceFactor",
Version: "2.6.0",