Skip to content

Commit 493f493

Browse files
committed
Add reconnect loop with exponential backoff to async_main
1 parent d9a1a2a commit 493f493

2 files changed

Lines changed: 225 additions & 37 deletions

File tree

irc_bot.py

Lines changed: 58 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import signal
66
import socket
7+
import time
78
import traceback
89

910
from command_router import CommandRouter
@@ -185,46 +186,66 @@ async def async_main():
185186
config_obj.check_options()
186187

187188
bot = IrcBot(config_obj)
188-
log = False
189-
190-
try:
191-
await bot.start()
192-
except Exception as error:
193-
if getattr(bot, "_logger", None) is not None:
194-
bot._logger.exception("Fatal error during startup or runtime.")
195-
log = True
196-
else:
197-
print("Logger was not initialized, cannot write to log file.")
198-
199-
print(f"Fatal error: {type(error).__name__}: {error}")
200-
traceb = traceback.format_exc()
201-
print(traceb)
202-
203-
with open("exception.txt", "w") as excFile:
204-
excFile.write(f"Fatal error at {datetime.datetime.today()}\n")
205-
excFile.write(traceb + "\n")
206-
excFile.write("-----------------------------------------------------\n")
207-
208-
if getattr(bot, "command_router", None) is not None:
209-
while not bot.command_router.recent_messages.empty():
210-
msg = bot.command_router.recent_messages.get_nowait()
211-
excFile.write(msg)
212-
excFile.write("\n")
213-
bot.command_router.task_pool.cancel_all()
214-
if log:
215-
bot._logger.debug("All tasks were signaled to shut down.")
216-
217-
excFile.write("-----------------------------------------------------\n")
218-
excFile.write("Connection Exception: \n")
219-
220-
if getattr(bot, "conn", None) is not None:
221-
excFile.write(str(bot.conn.error) + " \n")
189+
190+
initial_backoff = 5
191+
max_backoff = 300
192+
stable_threshold = 60
193+
backoff = initial_backoff
194+
195+
while True:
196+
start_time = time.monotonic()
197+
try:
198+
await bot.start()
199+
break
200+
201+
except (ConnectionDown, OSError, TimeoutError) as error:
202+
elapsed = time.monotonic() - start_time
203+
if elapsed > stable_threshold:
204+
backoff = initial_backoff
205+
bot._logger.warning("Connection lost: %s. Reconnecting in %ds...", error, backoff)
206+
await asyncio.sleep(backoff)
207+
backoff = min(backoff * 2, max_backoff)
208+
209+
except asyncio.CancelledError:
210+
break
211+
212+
except Exception as error:
213+
if getattr(bot, "_logger", None) is not None:
214+
bot._logger.exception("Fatal error during startup or runtime.")
222215
else:
223-
excFile.write("Connection not initialized\n")
216+
print("Logger was not initialized, cannot write to log file.")
217+
218+
print(f"Fatal error: {type(error).__name__}: {error}")
219+
traceb = traceback.format_exc()
220+
print(traceb)
221+
222+
with open("exception.txt", "w") as excFile:
223+
excFile.write(f"Fatal error at {datetime.datetime.today()}\n")
224+
excFile.write(traceb + "\n")
225+
excFile.write("-----------------------------------------------------\n")
226+
227+
if getattr(bot, "command_router", None) is not None:
228+
while not bot.command_router.recent_messages.empty():
229+
msg = bot.command_router.recent_messages.get_nowait()
230+
excFile.write(msg)
231+
excFile.write("\n")
232+
bot.command_router.task_pool.cancel_all()
233+
if getattr(bot, "_logger", None) is not None:
234+
bot._logger.debug("All tasks were signaled to shut down.")
235+
236+
excFile.write("-----------------------------------------------------\n")
237+
excFile.write("Connection Exception: \n")
238+
239+
if getattr(bot, "conn", None) is not None:
240+
excFile.write(str(bot.conn.error) + " \n")
241+
else:
242+
excFile.write("Connection not initialized\n")
243+
244+
excFile.write("-----------------------------------------------------\n")
224245

225-
excFile.write("-----------------------------------------------------\n")
246+
break
226247

227-
if log:
248+
if getattr(bot, "_logger", None) is not None:
228249
bot._logger.info("End of Session\n\n\n\n")
229250
logging.shutdown()
230251

tests/test_irc_bot.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,170 @@ async def test_nickserv_auth_reset_on_start(self):
201201
await bot.start()
202202

203203
assert bot.nickserv_auth is False
204+
205+
206+
class TestAsyncMainReconnect:
207+
def _mock_config(self):
208+
"""Mock Configuration so async_main doesn't need config.yml."""
209+
config_obj = MagicMock()
210+
config_obj.config = {
211+
"connection": {
212+
"server": "irc.example.com",
213+
"port": 6667,
214+
"nickname": "TestBot",
215+
"password": "",
216+
"ident": "testbot",
217+
"realname": "Test",
218+
},
219+
"administration": {
220+
"operators": [],
221+
"channels": [],
222+
"command_prefix": "!",
223+
"logging_level": "INFO",
224+
},
225+
"networking": {"force_ipv6": False, "bind_address": ""},
226+
}
227+
return config_obj
228+
229+
async def test_retries_on_connection_down(self):
230+
from irc_bot import async_main
231+
232+
config_obj = self._mock_config()
233+
call_count = 0
234+
235+
async def fake_start(self):
236+
nonlocal call_count
237+
call_count += 1
238+
if call_count < 3:
239+
raise ConnectionDown("irc.example.com", "now")
240+
# Third call: simulate graceful shutdown
241+
self.shutdown = True
242+
243+
with (
244+
patch("irc_bot.Configuration") as MockConfig,
245+
patch.object(IrcBot, "start", fake_start),
246+
patch("irc_bot.write_starting_date"),
247+
patch("irc_bot.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
248+
patch("irc_bot.time") as mock_time,
249+
):
250+
MockConfig.return_value = config_obj
251+
mock_time.monotonic.return_value = 0 # Elapsed < stable_threshold
252+
await async_main()
253+
254+
assert call_count == 3
255+
assert mock_sleep.await_count == 2 # Two retries before success
256+
257+
async def test_backoff_doubles(self):
258+
from irc_bot import async_main
259+
260+
config_obj = self._mock_config()
261+
call_count = 0
262+
263+
async def fake_start(self):
264+
nonlocal call_count
265+
call_count += 1
266+
if call_count < 4:
267+
raise ConnectionDown("irc.example.com", "now")
268+
self.shutdown = True
269+
270+
with (
271+
patch("irc_bot.Configuration") as MockConfig,
272+
patch.object(IrcBot, "start", fake_start),
273+
patch("irc_bot.write_starting_date"),
274+
patch("irc_bot.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
275+
patch("irc_bot.time") as mock_time,
276+
):
277+
MockConfig.return_value = config_obj
278+
mock_time.monotonic.return_value = 0
279+
await async_main()
280+
281+
# Backoff: 5, 10, 20
282+
sleep_values = [c.args[0] for c in mock_sleep.call_args_list]
283+
assert sleep_values == [5, 10, 20]
284+
285+
async def test_backoff_caps_at_300(self):
286+
from irc_bot import async_main
287+
288+
config_obj = self._mock_config()
289+
call_count = 0
290+
291+
async def fake_start(self):
292+
nonlocal call_count
293+
call_count += 1
294+
if call_count < 10:
295+
raise ConnectionDown("irc.example.com", "now")
296+
self.shutdown = True
297+
298+
with (
299+
patch("irc_bot.Configuration") as MockConfig,
300+
patch.object(IrcBot, "start", fake_start),
301+
patch("irc_bot.write_starting_date"),
302+
patch("irc_bot.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
303+
patch("irc_bot.time") as mock_time,
304+
):
305+
MockConfig.return_value = config_obj
306+
mock_time.monotonic.return_value = 0
307+
await async_main()
308+
309+
sleep_values = [c.args[0] for c in mock_sleep.call_args_list]
310+
# 5, 10, 20, 40, 80, 160, 300, 300, 300
311+
assert sleep_values[-1] == 300
312+
assert all(v <= 300 for v in sleep_values)
313+
314+
async def test_backoff_resets_after_stable_connection(self):
315+
from irc_bot import async_main
316+
317+
config_obj = self._mock_config()
318+
call_count = 0
319+
# 9 monotonic() calls: 5 for start_time + 4 for elapsed.
320+
# Iteration 3's elapsed = 100 - 0 = 100 > stable_threshold, triggers reset.
321+
times = iter([0, 0, 0, 0, 0, 100, 0, 0, 0])
322+
323+
async def fake_start(self):
324+
nonlocal call_count
325+
call_count += 1
326+
if call_count < 5:
327+
raise ConnectionDown("irc.example.com", "now")
328+
self.shutdown = True
329+
330+
with (
331+
patch("irc_bot.Configuration") as MockConfig,
332+
patch.object(IrcBot, "start", fake_start),
333+
patch("irc_bot.write_starting_date"),
334+
patch("irc_bot.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
335+
patch("irc_bot.time") as mock_time,
336+
):
337+
MockConfig.return_value = config_obj
338+
mock_time.monotonic.side_effect = times
339+
await async_main()
340+
341+
sleep_values = [c.args[0] for c in mock_sleep.call_args_list]
342+
# First two failures: 5, 10. Third was stable (100s elapsed), resets to 5. Fourth: 10.
343+
assert sleep_values == [5, 10, 5, 10]
344+
345+
async def test_retries_on_oserror(self):
346+
from irc_bot import async_main
347+
348+
config_obj = self._mock_config()
349+
call_count = 0
350+
351+
async def fake_start(self):
352+
nonlocal call_count
353+
call_count += 1
354+
if call_count == 1:
355+
raise ConnectionRefusedError("Connection refused")
356+
self.shutdown = True
357+
358+
with (
359+
patch("irc_bot.Configuration") as MockConfig,
360+
patch.object(IrcBot, "start", fake_start),
361+
patch("irc_bot.write_starting_date"),
362+
patch("irc_bot.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
363+
patch("irc_bot.time") as mock_time,
364+
):
365+
MockConfig.return_value = config_obj
366+
mock_time.monotonic.return_value = 0
367+
await async_main()
368+
369+
assert call_count == 2
370+
assert mock_sleep.await_count == 1

0 commit comments

Comments
 (0)