From 7ced9fc5d94ba948ca6b5804936056160fa866df Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Wed, 23 Jul 2025 10:36:53 +0800 Subject: [PATCH] fix: fix loading resource estimation (#43509) currently we multiplied the requesting size when adding to loading, but did not do so when estimating projected usage. issue: #43088 Signed-off-by: Buqian Zheng --- internal/core/src/cachinglayer/Utils.cpp | 28 +++++++++---------- .../core/src/cachinglayer/lrucache/DList.cpp | 24 ++++++++-------- .../core/src/cachinglayer/lrucache/DList.h | 3 -- 3 files changed, 25 insertions(+), 30 deletions(-) diff --git a/internal/core/src/cachinglayer/Utils.cpp b/internal/core/src/cachinglayer/Utils.cpp index cf09194f9c..6dc81d4ac5 100644 --- a/internal/core/src/cachinglayer/Utils.cpp +++ b/internal/core/src/cachinglayer/Utils.cpp @@ -70,8 +70,8 @@ getContainerMemLimit() { try { int64_t env_limit = std::stoll(mem_limit_env); limits.push_back(env_limit); - LOG_DEBUG("[MCL] Found MEM_LIMIT environment variable: {} bytes", - env_limit); + LOG_DEBUG("[MCL] Found MEM_LIMIT environment variable: {}", + FormatBytes(env_limit)); } catch (...) { LOG_WARN("[MCL] Invalid MEM_LIMIT environment variable: {}", mem_limit_env); @@ -103,8 +103,8 @@ getContainerMemLimit() { limits.push_back(proc_limit); LOG_DEBUG( "[MCL] Found process-specific cgroups v2 " - "limit: {} bytes", - proc_limit); + "limit: {}", + FormatBytes(proc_limit)); } catch (...) { // Ignore parse errors } @@ -127,8 +127,8 @@ getContainerMemLimit() { limits.push_back(proc_limit); LOG_DEBUG( "[MCL] Found process-specific cgroups " - "v1 limit: {} bytes", - proc_limit); + "v1 limit: {}", + FormatBytes(proc_limit)); } } catch (...) { // Ignore parse errors @@ -144,8 +144,8 @@ getContainerMemLimit() { // Return the minimum of all found limits if (!limits.empty()) { int64_t min_limit = *std::min_element(limits.begin(), limits.end()); - LOG_DEBUG("[MCL] Using minimum memory limit: {} bytes from {} sources", - min_limit, + LOG_DEBUG("[MCL] Using minimum memory limit: {} from {} sources", + FormatBytes(min_limit), limits.size()); return min_limit; } @@ -168,16 +168,16 @@ getSystemMemoryInfo() { if (container_limit > 0 && container_limit < host_memory) { info.total_memory_bytes = container_limit; - LOG_DEBUG("[MCL] Using container memory limit: {} bytes", - container_limit); + LOG_DEBUG("[MCL] Using container memory limit: {}", + FormatBytes(container_limit)); } else { info.total_memory_bytes = host_memory; if (container_limit > host_memory) { LOG_WARN( - "[MCL] Container limit ({} bytes) exceeds host memory ({} " - "bytes), using host memory", - container_limit, - host_memory); + "[MCL] Container limit ({}) exceeds host memory ({}), using " + "host memory", + FormatBytes(container_limit), + FormatBytes(host_memory)); } } diff --git a/internal/core/src/cachinglayer/lrucache/DList.cpp b/internal/core/src/cachinglayer/lrucache/DList.cpp index b1677d38f1..1bd2ce8c2b 100644 --- a/internal/core/src/cachinglayer/lrucache/DList.cpp +++ b/internal/core/src/cachinglayer/lrucache/DList.cpp @@ -187,14 +187,15 @@ DList::usageInfo(const ResourceUsage& actively_pinned) const { auto used = used_memory_.load(); static double precision = 100.0; return fmt::format( - "low_watermark_: {}, " - "high_watermark_: {} , " - "max_memory_: {} , " + "low_watermark_: {}; " + "high_watermark_: {}; " + "max_memory_: {}; " "used_memory_: {} {:.2}% of max, {:.2}% of " "high_watermark memory, {:.2}% of max, {:.2}% of " - "high_watermark disk, " - "evictable_size_: {}, " - "actively_pinned: {} {:.2}% of used memory, {:.2}% of used disk", + "high_watermark disk; " + "evictable_size_: {}; " + "actively_pinned: {} {:.2}% of used memory, {:.2}% of used disk; " + "loading: {}; ", low_watermark_.ToString(), high_watermark_.ToString(), max_memory_.ToString(), @@ -212,7 +213,8 @@ DList::usageInfo(const ResourceUsage& actively_pinned) const { static_cast(actively_pinned.memory_bytes) / used.memory_bytes * precision, static_cast(actively_pinned.file_bytes) / used.file_bytes * - precision); + precision, + loading_memory_.load().ToString()); } // this method is not thread safe, it does not attempt to lock each node, use for debug only. @@ -496,11 +498,6 @@ DList::IsEmpty() const { return head_ == nullptr; } -void -DList::addLoadingResource(const ResourceUsage& size) { - loading_memory_ += size * eviction_config_.loading_memory_factor; -} - void DList::removeLoadingResource(const ResourceUsage& size) { loading_memory_ -= size * eviction_config_.loading_memory_factor; @@ -587,7 +584,8 @@ DList::clearWaitingQueue() { } int64_t -DList::checkPhysicalMemoryLimit(const ResourceUsage& size) const { +DList::checkPhysicalMemoryLimit(const ResourceUsage& original) const { + auto size = original * eviction_config_.loading_memory_factor; auto sys_mem = getSystemMemoryInfo(); auto current_loading = loading_memory_.load(); int64_t projected_usage = sys_mem.used_memory_bytes + diff --git a/internal/core/src/cachinglayer/lrucache/DList.h b/internal/core/src/cachinglayer/lrucache/DList.h index 42bb4fcc79..cb84c03d91 100644 --- a/internal/core/src/cachinglayer/lrucache/DList.h +++ b/internal/core/src/cachinglayer/lrucache/DList.h @@ -124,9 +124,6 @@ class DList { void removeItem(ListNode* list_node, ResourceUsage size); - void - addLoadingResource(const ResourceUsage& size); - void removeLoadingResource(const ResourceUsage& size);