From 79926b412cf1003129e19409dbd43b95c8a58c79 Mon Sep 17 00:00:00 2001
From: congqixia <congqi.xia@zilliz.com>
Date: Thu, 20 Nov 2025 11:57:06 +0800
Subject: [PATCH] fix: protect tbb concurrent_map emplace to avoid race
 condition deadlock (#45681)

Related to #44974

The emplace() operation on tbb::concurrent_hash_map was not protected,
allowing other threads to erase entries between the emplace attempt and
the subsequent lookup.

Solution:
1. Add shared_lock protection around the emplace() operation to prevent
concurrent erasure during insertion
2. Instead of returning nullptr when the key is not found on retry,
recursively call Get(key) to retry the entire operation
3. Fix typo: "earsed" -> "erased"

This ensures that concurrent Get() operations are properly synchronized
and will eventually succeed even under high contention.

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
---
 internal/core/src/storage/StorageV2FSCache.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/internal/core/src/storage/StorageV2FSCache.cpp b/internal/core/src/storage/StorageV2FSCache.cpp
index d18fb670e4..c624b053d2 100644
--- a/internal/core/src/storage/StorageV2FSCache.cpp
+++ b/internal/core/src/storage/StorageV2FSCache.cpp
@@ -37,16 +37,21 @@ StorageV2FSCache::Get(const Key& key) {
     std::promise<milvus_storage::ArrowFileSystemPtr> p;
     std::shared_future<milvus_storage::ArrowFileSystemPtr> f = p.get_future();
 
+    std::shared_lock lck(mutex_);
     auto [iter, inserted] =
         concurrent_map_.emplace(key, Value(std::move(p), f));
+    lck.unlock();
+
     if (!inserted) {
         std::shared_lock lck(mutex_);
-        // double check: avoid iter has been earsed by other thread
+        // double check: avoid iter has been erased by other thread
         auto it = concurrent_map_.find(key);
         if (it != concurrent_map_.end()) {
             return it->second.second.get();
         }
-        return nullptr;
+        lck.unlock();
+        // retry if already delete
+        return Get(key);
     }
 
     try {