Skip to content
This repository was archived by the owner on Mar 18, 2026. It is now read-only.

Commit 8a5d014

Browse files
Merge pull request #434 from aibtcdev/chainhook-prevent-duplicates
make sure chainhook checker doesnt create duplicates when hiro platfo…
2 parents d3bc675 + abf70e8 commit 8a5d014

1 file changed

Lines changed: 41 additions & 25 deletions

File tree

app/services/infrastructure/job_management/tasks/chainhook_monitor.py

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,16 @@ async def _post_execution_cleanup(
140140
"""Cleanup after task execution."""
141141
logger.debug("Chainhook monitor task cleanup completed")
142142

143-
def _is_chainhook_healthy(self, chainhook_uuid: str) -> bool:
143+
def _is_chainhook_healthy(self, chainhook_uuid: str) -> tuple[bool, bool]:
144144
"""Check if a chainhook is in a healthy state by checking its status directly.
145145
146146
Args:
147147
chainhook_uuid: UUID of the chainhook to check
148148
149149
Returns:
150-
bool: True if chainhook is healthy, False otherwise
150+
tuple[bool, bool]: (is_healthy, should_recreate)
151+
- is_healthy: True if chainhook is healthy
152+
- should_recreate: True if chainhook should be recreated (permanent failure)
151153
"""
152154
try:
153155
# Get the specific chainhook status
@@ -156,7 +158,7 @@ def _is_chainhook_healthy(self, chainhook_uuid: str) -> bool:
156158
# Check if chainhook is enabled
157159
if not status_response.get("enabled", False):
158160
logger.warning(f"Chainhook {chainhook_uuid} is not enabled")
159-
return False
161+
return False, True # Not healthy, should recreate
160162

161163
# Check status type for any failure indicators
162164
status_info = status_response.get("status", {})
@@ -166,7 +168,7 @@ def _is_chainhook_healthy(self, chainhook_uuid: str) -> bool:
166168
logger.warning(
167169
f"Chainhook {chainhook_uuid} has status type: {status_type}"
168170
)
169-
return False
171+
return False, True # Not healthy, should recreate
170172

171173
# Additional checks on status info if available
172174
info = status_info.get("info", {})
@@ -179,12 +181,16 @@ def _is_chainhook_healthy(self, chainhook_uuid: str) -> bool:
179181
logger.warning(
180182
f"Chainhook {chainhook_uuid} has expired (expired_at: {expired_at}, last_evaluated: {last_evaluated})"
181183
)
182-
return False
184+
return False, True # Not healthy, should recreate
183185

184-
return True
186+
return True, False # Healthy, no need to recreate
185187
except Exception as e:
186-
logger.error(f"Error checking chainhook {chainhook_uuid} health: {str(e)}")
187-
return False
188+
# This is likely a temporary failure (network, API timeout, etc.)
189+
# Don't recreate the chainhook, just log the error and try again later
190+
logger.warning(
191+
f"Temporary error checking chainhook {chainhook_uuid} health: {str(e)}"
192+
)
193+
return False, False # Not healthy (unknown), but don't recreate
188194

189195
def _recreate_chainhook_for_chain_state(self, chain_state) -> Optional[str]:
190196
"""Recreate a chainhook for a given chain state.
@@ -329,33 +335,43 @@ async def _execute_impl(self, context: JobContext) -> List[ChainhookMonitorResul
329335
)
330336

331337
# Check if chainhook is healthy using direct status check
332-
if not self._is_chainhook_healthy(chainhook_uuid):
338+
is_healthy, should_recreate = self._is_chainhook_healthy(chainhook_uuid)
339+
340+
if not is_healthy:
333341
logger.warning(
334-
f"Chainhook {chainhook_uuid} is unhealthy or not found"
342+
f"Chainhook {chainhook_uuid} is unhealthy (should_recreate={should_recreate})"
335343
)
336344
chainhooks_failed += 1
337345
failed_chainhook_ids.append(chainhook_uuid)
338346

339-
# Try to recreate the chainhook
340-
new_uuid = self._recreate_chainhook_for_chain_state(chain_state)
341-
if new_uuid:
342-
chainhooks_recreated += 1
343-
recreated_chainhook_ids.append(new_uuid)
347+
# Only recreate if it's a permanent failure, not a temporary one
348+
if should_recreate:
344349
logger.info(
345-
f"Successfully recreated chainhook {new_uuid} to replace unhealthy {chainhook_uuid}"
350+
f"Recreating chainhook {chainhook_uuid} due to permanent failure"
346351
)
352+
new_uuid = self._recreate_chainhook_for_chain_state(chain_state)
353+
if new_uuid:
354+
chainhooks_recreated += 1
355+
recreated_chainhook_ids.append(new_uuid)
356+
logger.info(
357+
f"Successfully recreated chainhook {new_uuid} to replace failed {chainhook_uuid}"
358+
)
347359

348-
# Delete the old chainhook if it exists
349-
try:
350-
self.platform_api.delete_chainhook(chainhook_uuid)
351-
logger.info(f"Deleted old chainhook {chainhook_uuid}")
352-
except Exception as e:
353-
logger.warning(
354-
f"Failed to delete old chainhook {chainhook_uuid}: {str(e)}"
360+
# Delete the old chainhook if it exists
361+
try:
362+
self.platform_api.delete_chainhook(chainhook_uuid)
363+
logger.info(f"Deleted old chainhook {chainhook_uuid}")
364+
except Exception as e:
365+
logger.warning(
366+
f"Failed to delete old chainhook {chainhook_uuid}: {str(e)}"
367+
)
368+
else:
369+
logger.error(
370+
f"Failed to recreate chainhook for chain state {chain_state.id}"
355371
)
356372
else:
357-
logger.error(
358-
f"Failed to recreate chainhook for chain state {chain_state.id}"
373+
logger.info(
374+
f"Skipping recreation of chainhook {chainhook_uuid} - likely temporary failure"
359375
)
360376
else:
361377
logger.debug(f"Chainhook {chainhook_uuid} is healthy")

0 commit comments

Comments
 (0)