This repository was archived by the owner on Jan 23, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathrun.py
More file actions
164 lines (133 loc) · 6.01 KB
/
run.py
File metadata and controls
164 lines (133 loc) · 6.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import logging
import os
import signal
import sys
import anyio
import click
from anyio import create_task_group, open_signal_receiver
from jumpstarter_cli_common.config import opt_config
from jumpstarter_cli_common.exceptions import handle_exceptions
logger = logging.getLogger(__name__)
def _handle_exporter_exceptions(excgroup):
"""Handle exceptions from exporter serving."""
from jumpstarter_cli_common.exceptions import leaf_exceptions
for exc in leaf_exceptions(excgroup):
if not isinstance(exc, anyio.get_cancelled_exc_class()):
click.echo(
f"Exception while serving on the exporter: {type(exc).__name__}: {exc}",
err=True,
)
def _reap_zombie_processes(capture_child=None):
"""Reap zombie processes when running as PID 1."""
try:
while True:
try:
pid, status = os.waitpid(-1, os.WNOHANG)
if pid == 0:
break # No more children
if capture_child and pid == capture_child['pid']:
capture_child['status'] = status
logger.debug(f"PARENT: Reaped zombie process {pid} with status {status}")
except ChildProcessError:
break # No more children
except Exception as e:
logger.warning(f"PARENT: Error during zombie reaping: {e}")
def _handle_child(config):
"""Handle child process with graceful shutdown."""
async def serve_with_graceful_shutdown():
received_signal = 0
signal_handled = False
exporter = None
async def signal_handler():
nonlocal received_signal, signal_handled
with open_signal_receiver(signal.SIGINT, signal.SIGTERM, signal.SIGHUP, signal.SIGQUIT) as signals:
async for sig in signals:
if signal_handled:
continue # Ignore duplicate signals
received_signal = sig
logger.info("CHILD: Received %d (%s)", received_signal, signal.Signals(received_signal).name)
if exporter:
# Terminate exporter. SIGHUP waits until current lease is let go. Later SIGTERM still overrides
if received_signal != signal.SIGHUP:
signal_handled = True
exporter.stop(wait_for_lease_exit=received_signal == signal.SIGHUP, should_unregister=True)
# Start signal handler first, then create exporter
async with create_task_group() as signal_tg:
# Start signal handler immediately
signal_tg.start_soon(signal_handler)
# Create exporter and run it
async with config.create_exporter() as exporter:
try:
await exporter.serve()
except* Exception as excgroup:
_handle_exporter_exceptions(excgroup)
# Check if exporter set an exit code (e.g., from hook failure with on_failure='exit')
exporter_exit_code = exporter.exit_code
# Cancel the signal handler after exporter completes
signal_tg.cancel_scope.cancel()
# Return exit code in priority order:
# 1. Signal number if received (for signal-based termination)
# 2. Exporter's exit code if set (for hook failure with on_failure='exit')
# 3. 0 for immediate restart (normal exit without signal or explicit exit code)
if received_signal:
return received_signal
elif exporter_exit_code is not None:
return exporter_exit_code
else:
return 0
sys.exit(anyio.run(serve_with_graceful_shutdown))
def _wait_for_child(pid, child_info):
"""Wait for child process, get status from signal handler if reaped."""
try:
_, status = os.waitpid(pid, 0)
except ChildProcessError:
status = child_info['status']
return status
def _handle_parent(pid):
"""Handle parent process waiting for child and signal forwarding."""
child_info = {'pid': pid, 'status': None}
def parent_signal_handler(signum, _):
if signum == signal.SIGCHLD and os.getpid() == 1:
_reap_zombie_processes(capture_child=child_info) # capture our own direct child if reaped
elif signum != signal.SIGCHLD:
logger.info("PARENT: Got %d (%s), forwarding to child PG %d", signum, signal.Signals(signum).name, pid)
if pid > 0:
try:
os.killpg(pid, signum)
except (ProcessLookupError, OSError):
pass
# Set up signal handlers after fork
for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGHUP, signal.SIGQUIT, signal.SIGCHLD):
signal.signal(sig, parent_signal_handler)
status = _wait_for_child(pid, child_info)
if status is None:
return None
if os.WIFEXITED(status):
# Interpret child exit code
child_exit_code = os.WEXITSTATUS(status)
if child_exit_code == 0:
return None # restart child (unexpected exit/exception)
else:
# Child indicates termination (signal number)
return 128 + child_exit_code # Return standard Unix exit code
else:
# Child killed by unhandled signal - terminate
child_exit_signal = os.WTERMSIG(status) if os.WIFSIGNALED(status) else 0
click.echo(f"Child killed by unhandled signal: {child_exit_signal}", err=True)
return 128 + child_exit_signal
def _serve_with_exc_handling(config):
while True:
pid = os.fork()
if pid > 0:
if (exit_code := _handle_parent(pid)) is not None:
return exit_code
else:
os.setsid() # Become group leader so all spawned subprocesses are reached by parent's signals
_handle_child(config)
sys.exit(1) # should never happen
@click.command("run")
@opt_config(client=False)
@handle_exceptions
def run(config):
"""Run an exporter locally."""
return _serve_with_exc_handling(config)