From ef6d9c25c2e94a4de17d2b1fe959c7d26d93e3f5 Mon Sep 17 00:00:00 2001
From: congqixia <congqi.xia@zilliz.com>
Date: Fri, 26 Dec 2025 10:17:19 +0800
Subject: [PATCH] fix: check final result only in LeaderCacheObserver flaky
 test (#46601)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Related to #46600

The test previously checked if all 3 collection IDs were batched
together in a single InvalidateShardLeaderCache call. This caused
flakiness because the observer may split events across multiple calls.

Fix by accumulating all collection IDs across multiple calls and
verifying that eventually all expected IDs (1, 2, 3) are processed.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: the test asserts that all registered collection IDs
{1,2,3} are eventually processed by InvalidateShardLeaderCache across
any number of calls — i.e., the observer must invalidate every
registered collection ID, not necessarily in a single batched RPC (fixes
flaky assumption from issue #46600).
- Logic removed/simplified: the strict expectation that all three IDs
arrive in one InvalidateShardLeaderCache call was replaced by
accumulating IDs into a ConcurrentSet (collectionIDs.Upsert in the mock)
and asserting eventual containment of 1,2,3. This removes the brittle
per-call batching assertion and uses a set-based accumulation (lines
where the mock calls Upsert and final Eventually checks
collectionIDs.Contain(...)).
- Why this is safe (no data loss or behavior regression): only test
assertions changed — production code (LeaderCacheObserver calling
InvalidateShardLeaderCache) is unchanged. The mock intercepts
InvalidateShardLeaderCache and accumulates req.GetCollectionIDs(); the
test still verifies single-ID handling via the existing len==1 &&
lo.Contains(... ) check (first mock block) and verifies that all IDs
were invalidated over time in the batch scenario (second mock block). No
production code paths were modified, so invalidation behavior and RPC
usage remain identical.
- Bug-fix note: this is a targeted test-only fix for issue #46600 — it
tolerates legitimate splitting of events across multiple
InvalidateShardLeaderCache invocations by aggregating IDs across calls
in the test mock, eliminating flakiness without altering runtime
behavior.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
---
 .../querycoordv2/observers/leader_cache_observer_test.go | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/internal/querycoordv2/observers/leader_cache_observer_test.go b/internal/querycoordv2/observers/leader_cache_observer_test.go
index 237f8f27ca..77165d2434 100644
--- a/internal/querycoordv2/observers/leader_cache_observer_test.go
+++ b/internal/querycoordv2/observers/leader_cache_observer_test.go
@@ -69,23 +69,18 @@ func (suite *LeaderCacheObserverTestSuite) TestInvalidateShardLeaderCache() {
 	}, 3*time.Second, 1*time.Second)
 
 	// test batch submit events
-	ret.Store(false)
+	collectionIDs = typeutil.NewConcurrentSet[int64]()
 	suite.mockProxyManager.ExpectedCalls = nil
 	suite.mockProxyManager.EXPECT().InvalidateShardLeaderCache(mock.Anything, mock.Anything).RunAndReturn(
 		func(ctx context.Context, req *proxypb.InvalidateShardLeaderCacheRequest) error {
 			collectionIDs.Upsert(req.GetCollectionIDs()...)
-			collectionIDs := req.GetCollectionIDs()
-
-			if len(collectionIDs) == 3 && lo.Contains(collectionIDs, 1) && lo.Contains(collectionIDs, 2) && lo.Contains(collectionIDs, 3) {
-				ret.Store(true)
-			}
 			return nil
 		})
 	suite.observer.RegisterEvent(1)
 	suite.observer.RegisterEvent(2)
 	suite.observer.RegisterEvent(3)
 	suite.Eventually(func() bool {
-		return ret.Load()
+		return collectionIDs.Contain(1) && collectionIDs.Contain(2) && collectionIDs.Contain(3)
 	}, 3*time.Second, 1*time.Second)
 }