From 347acf62de20668c4411842a7d64a009b8ed6226 Mon Sep 17 00:00:00 2001 From: wei liu Date: Thu, 5 Jun 2025 16:54:33 +0800 Subject: [PATCH] fix: Fix intermittent deadlock in grouped allocator (#42524) issue: #42523 Resolve deadlock issue in GroupedAllocator where resource release notifications fail to propagate across hierarchy levels, causing child allocators to wait indefinitely. Changes include: - Add recursive notify() method to GroupedAllocator - Ensure all child allocators receive resource release notifications - Fix TOCTOU race condition in hierarchical resource management This resolves the intermittent test timeout in TestGroupedAllocator test that exhibited deadlock due to missed condition notifications. Signed-off-by: Wei Liu --- pkg/util/vralloc/sharedalloc.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/util/vralloc/sharedalloc.go b/pkg/util/vralloc/sharedalloc.go index 7944e98b50..9428e90699 100644 --- a/pkg/util/vralloc/sharedalloc.go +++ b/pkg/util/vralloc/sharedalloc.go @@ -110,6 +110,14 @@ func (ga *GroupedAllocator) GetAllocator(name string) Allocator[string] { return ga.children[name] } +// Note: GroupedAllocator should notify its all children. +func (ga *GroupedAllocator) notify() { + ga.SharedAllocator.notify() + for _, child := range ga.children { + child.notify() + } +} + type GroupedAllocatorBuilder struct { ga GroupedAllocator }