Skip to content

Commit 2d2bdc7

Browse files
authored
Merge pull request #1815 from codeflash-ai/fix/worktree-cleanup-on-signals
fix: ensure worktree cleanup on SIGTERM, SIGHUP, and atexit
2 parents e073592 + 9edb21d commit 2d2bdc7

2 files changed

Lines changed: 68 additions & 0 deletions

File tree

codeflash/code_utils/git_worktree_utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import configparser
4+
import os
45
import shutil
56
import stat
67
import subprocess
@@ -62,6 +63,10 @@ def create_detached_worktree() -> Optional[Path]:
6263

6364
repository.git.worktree("add", "-d", str(worktree_dir))
6465

66+
# Write PID file so stale worktrees can be detected after SIGKILL
67+
pid_file = worktree_dir / ".codeflash.pid"
68+
pid_file.write_text(str(os.getpid()), encoding="utf-8")
69+
6570
# Get uncommitted diff from the original repo
6671
repository.git.add("-N", ".") # add the index for untracked files to be included in the diff
6772
exclude_binary_files = [":!*.pyc", ":!*.pyo", ":!*.pyd", ":!*.so", ":!*.dll", ":!*.whl", ":!*.egg", ":!*.egg-info", ":!*.pyz", ":!*.pkl", ":!*.pickle", ":!*.joblib", ":!*.npy", ":!*.npz", ":!*.h5", ":!*.hdf5", ":!*.pth", ":!*.pt", ":!*.pb", ":!*.onnx", ":!*.db", ":!*.sqlite", ":!*.sqlite3", ":!*.feather", ":!*.parquet", ":!*.jpg", ":!*.jpeg", ":!*.png", ":!*.gif", ":!*.bmp", ":!*.tiff", ":!*.webp", ":!*.wav", ":!*.mp3", ":!*.ogg", ":!*.flac", ":!*.mp4", ":!*.avi", ":!*.mov", ":!*.mkv", ":!*.pdf", ":!*.doc", ":!*.docx", ":!*.xls", ":!*.xlsx", ":!*.ppt", ":!*.pptx", ":!*.zip", ":!*.rar", ":!*.tar", ":!*.tar.gz", ":!*.tgz", ":!*.bz2", ":!*.xz"] # fmt: off
@@ -116,6 +121,36 @@ def remove_worktree(worktree_dir: Path) -> None:
116121
logger.exception(f"Failed to remove worktree: {worktree_dir}")
117122

118123

124+
def is_process_alive(pid: int) -> bool:
125+
try:
126+
os.kill(pid, 0)
127+
except ProcessLookupError:
128+
return False
129+
except PermissionError:
130+
return True # process exists but we can't signal it
131+
return True
132+
133+
134+
def cleanup_stale_worktrees() -> None:
135+
"""Remove worktrees left behind by killed processes (e.g. SIGKILL)."""
136+
if not worktree_dirs.exists():
137+
return
138+
for entry in worktree_dirs.iterdir():
139+
if not entry.is_dir():
140+
continue
141+
pid_file = entry / ".codeflash.pid"
142+
if pid_file.exists():
143+
try:
144+
pid = int(pid_file.read_text(encoding="utf-8").strip())
145+
except (ValueError, OSError):
146+
pid = None
147+
if pid is not None and is_process_alive(pid):
148+
continue # worktree is still in use
149+
# No PID file or owning process is dead — stale worktree
150+
logger.info(f"Removing stale worktree: {entry}")
151+
remove_worktree(entry)
152+
153+
119154
def create_diff_patch_from_worktree(
120155
worktree_dir: Path, files: list[Path], fto_name: Optional[str] = None
121156
) -> Optional[Path]:

codeflash/optimization/optimizer.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from codeflash.code_utils.env_utils import get_pr_number, is_pr_draft
1818
from codeflash.code_utils.git_utils import check_running_in_git_repo, git_root_dir, mirror_path
1919
from codeflash.code_utils.git_worktree_utils import (
20+
cleanup_stale_worktrees,
2021
create_detached_worktree,
2122
create_diff_patch_from_worktree,
2223
create_worktree_snapshot_commit,
@@ -861,7 +862,33 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None:
861862

862863

863864
def run_with_args(args: Namespace) -> None:
865+
import atexit
866+
import signal
867+
868+
cleanup_stale_worktrees()
869+
864870
optimizer = None
871+
original_sigterm = signal.getsignal(signal.SIGTERM)
872+
original_sighup = signal.getsignal(signal.SIGHUP)
873+
original_sigquit = signal.getsignal(signal.SIGQUIT)
874+
original_sigpipe = signal.getsignal(signal.SIGPIPE)
875+
876+
def cleanup_worktree_on_exit() -> None:
877+
if optimizer and optimizer.current_worktree:
878+
remove_worktree(optimizer.current_worktree)
879+
880+
def signal_handler(signum: int, frame: object) -> None:
881+
logger.warning(f"Signal {signum} received. Cleaning up worktree and exiting…")
882+
if optimizer:
883+
optimizer.cleanup_temporary_paths()
884+
raise SystemExit(128 + signum)
885+
886+
atexit.register(cleanup_worktree_on_exit)
887+
signal.signal(signal.SIGTERM, signal_handler)
888+
signal.signal(signal.SIGHUP, signal_handler)
889+
signal.signal(signal.SIGQUIT, signal_handler)
890+
signal.signal(signal.SIGPIPE, signal_handler)
891+
865892
try:
866893
optimizer = Optimizer(args)
867894
optimizer.run()
@@ -871,3 +898,9 @@ def run_with_args(args: Namespace) -> None:
871898
optimizer.cleanup_temporary_paths()
872899

873900
raise SystemExit from None
901+
finally:
902+
atexit.unregister(cleanup_worktree_on_exit)
903+
signal.signal(signal.SIGTERM, original_sigterm)
904+
signal.signal(signal.SIGHUP, original_sighup)
905+
signal.signal(signal.SIGQUIT, original_sigquit)
906+
signal.signal(signal.SIGPIPE, original_sigpipe)

0 commit comments

Comments
 (0)