Skip to content

Commit b076846

Browse files
committed
fix(execute_class): address PR review feedback
- Replace misleading carried-over comment with accurate description - Add inline comments explaining double-checked locking pattern - Add failure-path test: deploy exception releases lock, allows retry
1 parent a92c92f commit b076846

2 files changed

Lines changed: 36 additions & 5 deletions

File tree

src/runpod_flash/execute_class.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,23 +225,22 @@ def __init__(self, *args, **kwargs):
225225
)
226226

227227
async def _ensure_initialized(self):
228-
"""Ensure the remote instance is created."""
228+
"""Ensure the remote instance is created exactly once, even under concurrent calls."""
229+
# Fast path: already initialized, no lock needed.
229230
if self._initialized:
230231
return
231232

233+
# Slow path: acquire lock and re-check to prevent double deployment
234+
# when multiple coroutines race past the fast-path check.
232235
async with self._init_lock:
233236
if self._initialized:
234237
return
235238

236-
# Get remote resource
237239
resource_manager = ResourceManager()
238240
remote_resource = await resource_manager.get_or_deploy_resource(
239241
self._resource_config
240242
)
241243
self._stub = stub_resource(remote_resource, **self._extra)
242-
243-
# Create the remote instance by calling a method (which will trigger instance creation)
244-
# We'll do this on first method call
245244
self._initialized = True
246245

247246
def __getattr__(self, name):

tests/bug_probes/test_class_execution.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,35 @@ async def test_second_call_skips_deploy(self, wrapper_instance):
111111

112112
await wrapper_instance._ensure_initialized()
113113
mock_rm.get_or_deploy_resource.assert_awaited_once()
114+
115+
@pytest.mark.asyncio
116+
async def test_deploy_failure_releases_lock_and_allows_retry(
117+
self, wrapper_instance
118+
):
119+
"""If deploy fails, the lock must be released and a subsequent call must retry."""
120+
call_count = 0
121+
122+
async def failing_then_succeeding_deploy(config):
123+
nonlocal call_count
124+
call_count += 1
125+
if call_count == 1:
126+
raise ConnectionError("transient failure")
127+
return MagicMock()
128+
129+
with (
130+
patch("runpod_flash.execute_class.ResourceManager") as mock_rm_cls,
131+
patch("runpod_flash.execute_class.stub_resource", return_value=MagicMock()),
132+
):
133+
mock_rm = MagicMock()
134+
mock_rm.get_or_deploy_resource = failing_then_succeeding_deploy
135+
mock_rm_cls.return_value = mock_rm
136+
137+
with pytest.raises(ConnectionError, match="transient failure"):
138+
await wrapper_instance._ensure_initialized()
139+
140+
assert not wrapper_instance._initialized
141+
142+
# Retry should succeed
143+
await wrapper_instance._ensure_initialized()
144+
assert wrapper_instance._initialized
145+
assert call_count == 2

0 commit comments

Comments
 (0)