Skip to content

Commit 6c55487

Browse files
authored
feat: detect clean rebase on PR synchronize and preserve review labels (#1060)
## Summary - Detect clean rebases on PR `synchronize` events by comparing SHA-256 hashes of `git diff` output before/after the push - Preserve review labels (`approved-by-*`, `lgtm-by-*`, `verified`, `commented-by-*`, `changes-requested-by-*`) on clean rebase instead of stripping them - Post a PR comment listing which labels were preserved when a clean rebase is detected ## Implementation Details - `_is_clean_rebase()` method computes merge-base diffs for old and new heads, hashes them with SHA-256, and compares - Conservative fallback: returns `False` (strip labels as usual) on any git command failure - Uses `shlex.quote()` on all interpolated git command values for safety - Skips detection early if repository clone is not available - Skips `_process_verified_for_update_or_new_pull_request()` on clean rebase to truly preserve the verified label - Clean rebase path runs comment posting and CI setup in parallel via `asyncio.gather` ## Test plan - [x] 21 new tests in `test_clean_rebase_detection.py` covering: - Clean rebase detected (matching diffs) → labels preserved - Non-clean rebase (different diffs) → labels removed as usual - All 5 git command failure paths → conservative fallback - `shlex.quote()` usage verification - Repo-not-cloned early exit - Verified label preservation (not removed by downstream processing) - Comment content with/without review labels - Parallel task failure handling - [x] 133 existing PR handler tests pass with no regressions Closes #1059 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Detect clean rebases on pull request synchronizations: skip full re-verification, refresh the verified check state without toggling the verified label, preserve selected review/verified labels, and avoid removing labels on clean-rebase events. * Post a best-effort notification on clean-rebase events summarizing preserved labels and the prior commit; posting failures are logged without blocking processing. * **Tests** * Added and updated tests covering clean-rebase detection, sync behavior, label preservation, verified-check handling, comment posting, and error/timeout scenarios. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 09d213d commit 6c55487

3 files changed

Lines changed: 1228 additions & 12 deletions

File tree

webhook_server/libs/handlers/pull_request_handler.py

Lines changed: 205 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import hashlib
5+
import shlex
46
import traceback
57
from collections.abc import Coroutine
68
from typing import TYPE_CHECKING, Any
@@ -40,6 +42,7 @@
4042
VERIFIED_LABEL_STR,
4143
WIP_STR,
4244
)
45+
from webhook_server.utils.helpers import run_command
4346

4447
if TYPE_CHECKING:
4548
from webhook_server.libs.github_api import GithubWebhook
@@ -68,6 +71,161 @@ def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFil
6871
github_webhook=self.github_webhook, owners_file_handler=self.owners_file_handler
6972
)
7073

74+
async def _is_clean_rebase(self, _pull_request: PullRequest) -> bool:
75+
"""Detect whether a synchronize event is a clean rebase (same code changes on a newer base).
76+
77+
Compares the sha256 hash of the diff between merge-base..HEAD for both
78+
the old head (before) and the new head (after). If the hashes match,
79+
the push was a pure rebase with no code changes.
80+
81+
Args:
82+
_pull_request: The GitHub pull request object (unused; kept for caller compatibility).
83+
84+
Returns:
85+
True if the push is a clean rebase, False otherwise.
86+
On any failure (git command, exception, timeout), returns False (conservative).
87+
"""
88+
try:
89+
if not self.github_webhook._repo_cloned:
90+
self.logger.debug(f"{self.log_prefix} Clean rebase detection: skipped, repository not cloned")
91+
return False
92+
93+
before_sha: str = self.hook_data["before"]
94+
after_sha: str = self.hook_data["after"]
95+
base_ref: str = self.hook_data["pull_request"]["base"]["ref"]
96+
clone_dir: str = self.github_webhook.clone_repo_dir
97+
98+
clone_dir_q = shlex.quote(clone_dir)
99+
before_sha_q = shlex.quote(before_sha)
100+
after_sha_q = shlex.quote(after_sha)
101+
base_ref_q = shlex.quote(f"origin/{base_ref}")
102+
103+
# Step 1: Fetch the old head SHA (may not be in the clone)
104+
rc, _, _ = await run_command(
105+
command=f"git -C {clone_dir_q} fetch origin {before_sha_q}",
106+
log_prefix=self.log_prefix,
107+
mask_sensitive=self.github_webhook.mask_sensitive,
108+
timeout=60,
109+
)
110+
if not rc:
111+
self.logger.warning(
112+
f"{self.log_prefix} Clean rebase detection: failed to fetch old head {before_sha[:7]}"
113+
)
114+
return False
115+
116+
# Step 2: Get merge-base for old head
117+
rc, old_merge_base_out, _ = await run_command(
118+
command=f"git -C {clone_dir_q} merge-base {base_ref_q} {before_sha_q}",
119+
log_prefix=self.log_prefix,
120+
mask_sensitive=self.github_webhook.mask_sensitive,
121+
timeout=60,
122+
)
123+
if not rc:
124+
self.logger.warning(
125+
f"{self.log_prefix} Clean rebase detection: failed to find merge-base for old head {before_sha[:7]}"
126+
)
127+
return False
128+
old_merge_base = old_merge_base_out.strip()
129+
if not old_merge_base:
130+
self.logger.warning(
131+
f"{self.log_prefix} Clean rebase detection: empty merge-base for old head {before_sha[:7]}"
132+
)
133+
return False
134+
old_merge_base_q = shlex.quote(old_merge_base)
135+
136+
# Step 3: Get merge-base for new head
137+
rc, new_merge_base_out, _ = await run_command(
138+
command=f"git -C {clone_dir_q} merge-base {base_ref_q} {after_sha_q}",
139+
log_prefix=self.log_prefix,
140+
mask_sensitive=self.github_webhook.mask_sensitive,
141+
timeout=60,
142+
)
143+
if not rc:
144+
self.logger.warning(
145+
f"{self.log_prefix} Clean rebase detection: failed to find merge-base for new head {after_sha[:7]}"
146+
)
147+
return False
148+
new_merge_base = new_merge_base_out.strip()
149+
if not new_merge_base:
150+
self.logger.warning(
151+
f"{self.log_prefix} Clean rebase detection: empty merge-base for new head {after_sha[:7]}"
152+
)
153+
return False
154+
new_merge_base_q = shlex.quote(new_merge_base)
155+
156+
# Step 4: Compute diff hash for old range
157+
rc, old_diff, _ = await run_command(
158+
command=f"git -C {clone_dir_q} diff --binary {old_merge_base_q}..{before_sha_q}",
159+
log_prefix=self.log_prefix,
160+
mask_sensitive=self.github_webhook.mask_sensitive,
161+
timeout=60,
162+
)
163+
if not rc:
164+
self.logger.warning(f"{self.log_prefix} Clean rebase detection: failed to compute diff for old range")
165+
return False
166+
167+
# Step 5: Compute diff hash for new range
168+
rc, new_diff, _ = await run_command(
169+
command=f"git -C {clone_dir_q} diff --binary {new_merge_base_q}..{after_sha_q}",
170+
log_prefix=self.log_prefix,
171+
mask_sensitive=self.github_webhook.mask_sensitive,
172+
timeout=60,
173+
)
174+
if not rc:
175+
self.logger.warning(f"{self.log_prefix} Clean rebase detection: failed to compute diff for new range")
176+
return False
177+
178+
# Step 6: Compare hashes
179+
old_hash = hashlib.sha256(old_diff.encode()).hexdigest()
180+
new_hash = hashlib.sha256(new_diff.encode()).hexdigest()
181+
182+
is_clean = old_hash == new_hash
183+
self.logger.info(
184+
f"{self.log_prefix} Clean rebase detection: "
185+
f"old_hash={old_hash[:12]}, new_hash={new_hash[:12]}, is_clean_rebase={is_clean}"
186+
)
187+
return is_clean
188+
except asyncio.CancelledError:
189+
raise
190+
except Exception:
191+
self.logger.exception(
192+
f"{self.log_prefix} Clean rebase detection failed unexpectedly; treating as non-clean"
193+
)
194+
return False
195+
196+
async def _post_clean_rebase_comment(
197+
self, pull_request: PullRequest, before_sha: str, label_names: list[str]
198+
) -> None:
199+
"""Post a comment about clean rebase detection. Best-effort -- failures are logged but don't block CI."""
200+
try:
201+
review_labels = [
202+
name
203+
for name in label_names
204+
if name.startswith(APPROVED_BY_LABEL_PREFIX)
205+
or name.startswith(LGTM_BY_LABEL_PREFIX)
206+
or name.startswith(COMMENTED_BY_LABEL_PREFIX)
207+
or name.startswith(CHANGED_REQUESTED_BY_LABEL_PREFIX)
208+
or name == VERIFIED_LABEL_STR
209+
]
210+
211+
if review_labels:
212+
labels_str = ", ".join(f"`{lbl}`" for lbl in review_labels)
213+
comment_body = (
214+
f"**Clean rebase detected** \u2014 no code changes compared to "
215+
f"previous head (`{before_sha[:7]}`).\n"
216+
f"The following labels were preserved: {labels_str}."
217+
)
218+
else:
219+
comment_body = (
220+
f"**Clean rebase detected** \u2014 no code changes compared to previous head (`{before_sha[:7]}`)."
221+
)
222+
223+
await asyncio.to_thread(pull_request.create_issue_comment, body=comment_body)
224+
except asyncio.CancelledError:
225+
raise
226+
except Exception:
227+
self.logger.exception(f"{self.log_prefix} Failed to post clean-rebase comment")
228+
71229
async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> None:
72230
hook_action: str = self.hook_data["action"]
73231
if self.ctx:
@@ -122,10 +280,24 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) ->
122280
return
123281

124282
if hook_action == "synchronize":
125-
sync_tasks: list[Coroutine[Any, Any, Any]] = []
126-
127-
sync_tasks.append(self.process_opened_or_synchronize_pull_request(pull_request=pull_request))
128-
sync_tasks.append(self.remove_labels_when_pull_request_sync(pull_request=pull_request))
283+
clean_rebase = await self._is_clean_rebase(_pull_request=pull_request)
284+
285+
if clean_rebase:
286+
before_sha: str = self.hook_data["before"]
287+
label_names = [label["name"] for label in pull_request_data.get("labels", [])]
288+
sync_tasks = [
289+
self._post_clean_rebase_comment(
290+
pull_request=pull_request, before_sha=before_sha, label_names=label_names
291+
),
292+
self.process_opened_or_synchronize_pull_request(
293+
pull_request=pull_request, is_clean_rebase=True, label_names=label_names
294+
),
295+
]
296+
else:
297+
sync_tasks = [
298+
self.process_opened_or_synchronize_pull_request(pull_request=pull_request, is_clean_rebase=False),
299+
self.remove_labels_when_pull_request_sync(pull_request=pull_request),
300+
]
129301

130302
results = await asyncio.gather(*sync_tasks, return_exceptions=True)
131303

@@ -478,7 +650,10 @@ def _prepare_tips_section(self) -> str:
478650
tips.append("* **WIP Status**: Use `/wip` when your PR is not ready for review")
479651

480652
if self.labels_handler.is_label_enabled(VERIFIED_LABEL_STR):
481-
tips.append("* **Verification**: The verified label is automatically removed on each new commit")
653+
tips.append(
654+
"* **Verification**: The verified label is removed on new commits"
655+
" unless the push is detected as a clean rebase"
656+
)
482657

483658
# Cherry-pick tip - check if cherry-pick labels are enabled
484659
if self.labels_handler.is_label_enabled(CHERRY_PICKED_LABEL):
@@ -835,7 +1010,9 @@ def _find_matching_issue() -> Any | None:
8351010
)
8361011
await asyncio.to_thread(matching_issue.edit, state="closed")
8371012

838-
async def process_opened_or_synchronize_pull_request(self, pull_request: PullRequest) -> None:
1013+
async def process_opened_or_synchronize_pull_request(
1014+
self, pull_request: PullRequest, is_clean_rebase: bool = False, label_names: list[str] | None = None
1015+
) -> None:
8391016
if self.ctx:
8401017
self.ctx.start_step("pr_workflow_setup")
8411018

@@ -865,7 +1042,13 @@ async def process_opened_or_synchronize_pull_request(self, pull_request: PullReq
8651042
if self.github_webhook.build_and_push_container:
8661043
setup_tasks.append(self.check_run_handler.set_check_queued(name=BUILD_CONTAINER_STR))
8671044

868-
setup_tasks.append(self._process_verified_for_update_or_new_pull_request(pull_request=pull_request))
1045+
if is_clean_rebase:
1046+
# label_names is guaranteed non-None when is_clean_rebase=True (caller always provides it)
1047+
setup_tasks.append(
1048+
self._sync_verified_check_for_clean_rebase(_pull_request=pull_request, label_names=label_names) # type: ignore[arg-type]
1049+
)
1050+
else:
1051+
setup_tasks.append(self._process_verified_for_update_or_new_pull_request(pull_request=pull_request))
8691052
setup_tasks.append(self.labels_handler.add_size_label(pull_request=pull_request))
8701053
setup_tasks.append(self.add_pull_request_owner_as_assingee(pull_request=pull_request))
8711054

@@ -1226,6 +1409,21 @@ async def _process_verified_for_update_or_new_pull_request(self, pull_request: P
12261409
await self.labels_handler._remove_label(pull_request=pull_request, label=VERIFIED_LABEL_STR)
12271410
await self.check_run_handler.set_check_queued(name=VERIFIED_LABEL_STR)
12281411

1412+
async def _sync_verified_check_for_clean_rebase(self, _pull_request: PullRequest, label_names: list[str]) -> None:
1413+
"""Sync the verified check run to the new commit SHA after a clean rebase.
1414+
1415+
Unlike _process_verified_for_update_or_new_pull_request, this does NOT
1416+
remove/re-add the verified label. It only refreshes the check run state
1417+
to match the existing label on the new head commit.
1418+
"""
1419+
if not self.github_webhook.verified_job:
1420+
return
1421+
1422+
if VERIFIED_LABEL_STR in label_names:
1423+
await self.check_run_handler.set_check_success(name=VERIFIED_LABEL_STR)
1424+
else:
1425+
await self.check_run_handler.set_check_queued(name=VERIFIED_LABEL_STR)
1426+
12291427
async def add_pull_request_owner_as_assingee(self, pull_request: PullRequest) -> None:
12301428
try:
12311429
self.logger.info(f"{self.log_prefix} Adding PR owner as assignee")

0 commit comments

Comments
 (0)