1+ import atexit
12import json
23import logging
34import os
5+ import signal
46import sys
57import time
68
3335# A storage place for ant node data
3436Workers = []
3537
38+ # Track whether we created the lock file
39+ _lock_file_created = False
40+
3641# Detect ANM
3742
3843
44+ def cleanup_lock_file ():
45+ """Safely remove lock file if it was created by this process."""
46+ global _lock_file_created
47+ if _lock_file_created and os .path .exists (LOCK_FILE ):
48+ try :
49+ os .remove (LOCK_FILE )
50+ logging .debug ("Lock file removed during cleanup" )
51+ except (PermissionError , OSError ) as e :
52+ logging .error (f"Error removing lock file during cleanup: { e } " )
53+
54+
55+ def signal_handler (signum , frame ):
56+ """Handle termination signals by cleaning up and exiting."""
57+ signal_name = signal .Signals (signum ).name
58+ logging .info (f"Received { signal_name } , cleaning up..." )
59+ cleanup_lock_file ()
60+ sys .exit (1 )
61+
62+
63+ # Register signal handlers for graceful shutdown
64+ signal .signal (signal .SIGTERM , signal_handler )
65+ signal .signal (signal .SIGINT , signal_handler )
66+
67+ # Register cleanup function to run on normal exit
68+ atexit .register (cleanup_lock_file )
69+
70+
3971# Make a decision about what to do (new implementation using DecisionEngine)
4072def choose_action (machine_config , metrics , dry_run ):
4173 """Plan and execute actions using DecisionEngine and ActionExecutor.
@@ -138,9 +170,12 @@ def main():
138170 sys .exit (1 )
139171
140172 # We're starting, so lets create a lock file
173+ global _lock_file_created
141174 try :
142175 with open (LOCK_FILE , "w" ) as file :
143176 file .write (str (int (time .time ())))
177+ _lock_file_created = True
178+ logging .debug (f"Lock file created: { LOCK_FILE } " )
144179 except (PermissionError , OSError ) as e :
145180 logging .error (f"Unable to create lock file: { e } " )
146181 sys .exit (1 )
@@ -150,7 +185,6 @@ def main():
150185 if not options .confirm :
151186 logging .error ("Database migration requires --confirm flag for safety" )
152187 logging .info ("Use: wnm --force_action wnm-db-migration --confirm" )
153- os .remove (LOCK_FILE )
154188 sys .exit (1 )
155189
156190 # Import migration utilities
@@ -162,7 +196,6 @@ def main():
162196 if not pending :
163197 logging .info ("Database is already up to date!" )
164198 logging .info (f"Current revision: { current } " )
165- os .remove (LOCK_FILE )
166199 sys .exit (0 )
167200
168201 logging .info ("=" * 70 )
@@ -176,13 +209,11 @@ def main():
176209 run_migrations (engine , options .dbpath )
177210 logging .info ("Database migration completed successfully!" )
178211 logging .info ("=" * 70 )
179- os .remove (LOCK_FILE )
180212 sys .exit (0 )
181213 except Exception as e :
182214 logging .error (f"Migration failed: { e } " )
183215 logging .error ("Please restore from backup and report this issue." )
184216 logging .info ("=" * 70 )
185- os .remove (LOCK_FILE )
186217 sys .exit (1 )
187218
188219 # Config should have loaded the machine_config
@@ -208,8 +239,7 @@ def main():
208239 logging .info ("Configuration updated successfully" )
209240 else :
210241 logging .info ("No configuration changes detected" )
211- # Clean up and exit immediately
212- os .remove (LOCK_FILE )
242+ # Exit immediately (atexit will clean up lock file)
213243 sys .exit (0 )
214244
215245 # Check for config updates
@@ -324,7 +354,6 @@ def main():
324354 # Handle --init flag: exit after initialization (and optional survey)
325355 if options .init :
326356 logging .info ("Initialization complete" )
327- os .remove (LOCK_FILE )
328357 sys .exit (0 )
329358
330359 # Check for reports
@@ -373,15 +402,13 @@ def main():
373402 report_output = f"Unknown report type: { options .report } "
374403
375404 print (report_output )
376- os .remove (LOCK_FILE )
377405 sys .exit (0 )
378406
379407 # Check for forced actions
380408 if options .force_action :
381409 # Teardown requires confirmation for safety
382410 if options .force_action == "teardown" and not options .confirm :
383411 logging .error ("Teardown requires --confirm flag for safety" )
384- os .remove (LOCK_FILE )
385412 sys .exit (1 )
386413
387414 logging .info (f"Executing forced action: { options .force_action } " )
@@ -399,7 +426,7 @@ def main():
399426
400427 logging .info ("Action: " + json .dumps (this_action , indent = 2 ))
401428
402- os . remove ( LOCK_FILE )
429+ # Exit normally (atexit will clean up lock file )
403430 sys .exit (0 )
404431
405432
0 commit comments