Skip to content

Commit a18f2f1

Browse files
Nginx down, VM down, network issue, docker-compose stopped, UnAuthorized handled
1 parent 547e2a0 commit a18f2f1

2 files changed

Lines changed: 66 additions & 71 deletions

File tree

Framework/deploy_handler/long_poll_handler.py

Lines changed: 65 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from colorama import Fore
1010
from pathlib import Path
1111
from urllib.parse import urlparse
12+
import requests
1213

1314
from Framework.Utilities import RequestFormatter, ConfigModule, CommonUtil
1415
from Framework.Utilities.RequestFormatter import REQUEST_TIMEOUT
@@ -253,80 +254,74 @@ def respond_to_key_request(self, request_id: str, private_key_pem: str) -> None:
253254

254255
async def run(self, host: str) -> None:
255256
reconnect = False
256-
server_online = False
257-
async with httpx.AsyncClient(timeout=httpx.Timeout(70.0), verify=False) as client:
258-
while True:
259-
if STATE.reconnect_with_credentials is not None:
257+
print_online = False
258+
while True:
259+
if STATE.reconnect_with_credentials is not None:
260+
break
261+
262+
if reconnect:
263+
await asyncio.sleep(random.randint(1, 3))
264+
265+
await self.on_connect_callback(reconnect)
266+
267+
try:
268+
reconnect = True
269+
resp = RequestFormatter.request("get", host, verify=False, timeout=70)
270+
if resp is None:
260271
break
261272

262-
if reconnect:
263-
if server_online:
264-
await asyncio.sleep(0.1)
265-
else:
266-
await asyncio.sleep(random.randint(1, 3))
273+
if resp.content.startswith(self.ERROR_PREFIX):
274+
self.on_error(resp.content)
275+
continue
267276

268-
await self.on_connect_callback(reconnect)
277+
if resp.ok and print_online:
278+
print_online = False
279+
node_id = CommonUtil.MachineInfo().getLocalUser().lower()
280+
print(f"🟢 {node_id} back to online")
269281

270-
try:
271-
reconnect = True
272-
resp = await self.fetch(host, client)
273-
if resp is None:
274-
break
275-
276-
if resp.content.startswith(self.ERROR_PREFIX):
277-
server_online = False
278-
self.on_error(resp.content)
279-
continue
280-
281-
if resp.status_code == httpx.codes.NO_CONTENT:
282-
server_online = False
283-
continue
284-
285-
if not resp.is_success:
286-
server_online = False
287-
print(
288-
"[deploy] facing difficulty communicating with the server, status code:",
289-
resp.status_code,
290-
" | reconnecting",
291-
)
292-
try:
293-
print(Fore.YELLOW + str(resp.content))
294-
except Exception:
295-
pass
296-
297-
# Encountered a server error, retry.
298-
await asyncio.sleep(random.randint(1, 3))
299-
return
300-
301-
should_quit = await self.on_message(resp.content)
302-
if should_quit:
303-
break
304-
305-
reconnect = False
306-
server_online = True
307-
except httpx.ReadTimeout:
308-
pass
309-
except Exception:
310-
traceback.print_exc()
311-
print("[deploy] RETRYING...")
312-
313-
async def fetch(self, host: str, client: httpx.AsyncClient) -> httpx.Response | None:
314-
try:
315-
api_key = ConfigModule.get_config_value("Authentication", "api-key")
316-
headers = {"X-API-KEY": api_key}
317-
318-
while True:
282+
if resp.status_code == httpx.codes.NO_CONTENT:
283+
continue
284+
285+
if resp.status_code == httpx.codes.BAD_GATEWAY:
286+
print_online = True
287+
print(Fore.YELLOW + "Server offline. Retrying after 20s")
288+
await asyncio.sleep(20)
289+
continue
290+
291+
if not resp.ok:
292+
print(
293+
"[deploy] facing difficulty communicating with the server, status code:",
294+
resp.status_code,
295+
" | reconnecting",
296+
)
297+
298+
# Encountered a server error, retry.
299+
await asyncio.sleep(random.randint(1, 3))
300+
continue
301+
302+
should_quit = await self.on_message(resp.content)
303+
if should_quit:
304+
break
305+
306+
reconnect = False
307+
except (
308+
requests.exceptions.ConnectTimeout,
309+
requests.exceptions.ReadTimeout,
310+
requests.exceptions.ConnectionError,
311+
) as e:
312+
# Nginx down, VM down, network issue, docker-compose stopped
319313
if STATE.reconnect_with_credentials is not None:
320314
return None
321-
322-
try:
323-
resp = await client.get(host, headers=headers)
324-
return resp
325-
except asyncio.CancelledError:
315+
print_online = True
316+
print(e)
317+
print(Fore.YELLOW + "Retrying after 30s")
318+
await asyncio.sleep(30)
319+
320+
except Exception as e:
321+
if STATE.reconnect_with_credentials is not None:
326322
return None
327-
except Exception:
328-
if STATE.reconnect_with_credentials is not None:
329-
return None
330-
await asyncio.sleep(0.1)
331-
except Exception:
332-
return None
323+
print_online = True
324+
print(e)
325+
print(Fore.YELLOW + "Retrying after 30s")
326+
await asyncio.sleep(30)
327+

node_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def update_machine(dependency, should_print=True):
565565
resp = RequestFormatter.request("post", url, json=update_object)
566566

567567
if resp.status_code != 200:
568-
CommonUtil.ExecLog("", "Machine is not registered as online", 4)
568+
# CommonUtil.ExecLog("", "Machine is not registered as online", 4)
569569
return
570570

571571
data = resp.json()

0 commit comments

Comments
 (0)