Skip to content

Commit 0cc752f

Browse files
authored
Harden scheduler switch guards
1 parent b668260 commit 0cc752f

4 files changed

Lines changed: 83 additions & 4 deletions

File tree

scripts/cloud_run_runtime_guard.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,19 @@ def _load_services() -> list[str]:
8585
return unique
8686

8787

88+
def _scheduler_job_pattern_for_services(services: list[str]) -> str:
89+
candidates: list[str] = []
90+
for service in services:
91+
service_name = str(service or "").strip()
92+
if not service_name:
93+
continue
94+
candidates.append(service_name)
95+
if service_name.endswith("-service"):
96+
candidates.append(service_name.removesuffix("-service"))
97+
unique = list(dict.fromkeys(candidates))
98+
return "|".join(re.escape(candidate) for candidate in unique)
99+
100+
88101
def _run_gcloud_logging(project: str, log_filter: str, limit: int) -> list[dict[str, Any]]:
89102
command = [
90103
"gcloud",
@@ -214,7 +227,6 @@ def main() -> int:
214227
require_success = _env_bool("RUNTIME_GUARD_REQUIRE_SUCCESS", False)
215228
fail_workflow = _env_bool("RUNTIME_GUARD_FAIL_WORKFLOW_ON_ALERT", True)
216229
check_scheduler = _env_bool("RUNTIME_GUARD_CHECK_SCHEDULER", True)
217-
scheduler_pattern = os.environ.get("RUNTIME_GUARD_SCHEDULER_JOB_PATTERN") or ""
218230

219231
since = (
220232
dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=lookback_minutes)
@@ -230,6 +242,10 @@ def main() -> int:
230242
except RuntimeError as exc:
231243
services = []
232244
issues.append(f"service configuration error: {exc}")
245+
scheduler_pattern = (
246+
os.environ.get("RUNTIME_GUARD_SCHEDULER_JOB_PATTERN")
247+
or _scheduler_job_pattern_for_services(services)
248+
)
233249

234250
for service in services:
235251
log_filter = (

scripts/execution_report_heartbeat.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,24 @@ def _describe_scheduler_jobs_for_services(
313313
) -> list[dict[str, Any]]:
314314
jobs = []
315315
for service in services:
316-
job = _describe_scheduler_job(f"{service}-scheduler", project=project)
317-
if job:
318-
jobs.append(job)
316+
for job_name in _scheduler_job_name_candidates(service):
317+
job = _describe_scheduler_job(job_name, project=project)
318+
if job:
319+
jobs.append(job)
320+
break
319321
return jobs
320322

321323

324+
def _scheduler_job_name_candidates(service: str) -> list[str]:
325+
service_name = str(service or "").strip()
326+
if not service_name:
327+
return []
328+
candidates = [f"{service_name}-scheduler"]
329+
if service_name.endswith("-service"):
330+
candidates.append(f"{service_name.removesuffix('-service')}-scheduler")
331+
return _unique_values(candidates)
332+
333+
322334
def _scheduler_job_targets_strategy_run(job: dict[str, Any], service: str) -> bool:
323335
if str(job.get("state") or "").strip().upper() not in {"", "ENABLED"}:
324336
return False
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from __future__ import annotations
2+
3+
import re
4+
5+
from scripts import cloud_run_runtime_guard as guard
6+
7+
8+
def test_scheduler_job_pattern_includes_service_alias():
9+
pattern = guard._scheduler_job_pattern_for_services(["longbridge-quant-hk-service"])
10+
11+
assert re.search(pattern, "longbridge-quant-hk-service-scheduler")
12+
assert re.search(pattern, "longbridge-quant-hk-scheduler")
13+
assert not re.search(pattern, "longbridge-quant-sg-scheduler")

tests/test_execution_report_heartbeat.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,44 @@ def test_scheduler_aware_required_services_fall_back_to_named_scheduler_describe
172172
assert scheduler_checked is True
173173

174174

175+
def test_scheduler_aware_named_fallback_uses_service_alias(monkeypatch):
176+
_clear_runtime_env(monkeypatch)
177+
monkeypatch.setenv("CLOUD_RUN_SERVICE", "longbridge-quant-hk-service")
178+
monkeypatch.setattr(
179+
heartbeat,
180+
"_list_scheduler_jobs",
181+
lambda **_kwargs: (_ for _ in ()).throw(RuntimeError("cloudscheduler.jobs.list denied")),
182+
)
183+
requested_job_names = []
184+
185+
def fake_describe_scheduler_job(job_name, **_kwargs):
186+
requested_job_names.append(job_name)
187+
if job_name != "longbridge-quant-hk-scheduler":
188+
return None
189+
return {
190+
"state": "ENABLED",
191+
"schedule": "45 15 1-7 * *",
192+
"timeZone": "Asia/Hong_Kong",
193+
"httpTarget": {"uri": "https://longbridge-quant-hk-service.example.run.app/"},
194+
}
195+
196+
monkeypatch.setattr(heartbeat, "_describe_scheduler_job", fake_describe_scheduler_job)
197+
198+
required, skip_reason, scheduler_checked = heartbeat._resolve_required_services(
199+
project="project-1",
200+
since=dt.datetime(2026, 6, 10, 0, 0, tzinfo=dt.timezone.utc),
201+
now=dt.datetime(2026, 6, 10, 2, 0, tzinfo=dt.timezone.utc),
202+
)
203+
204+
assert requested_job_names == [
205+
"longbridge-quant-hk-service-scheduler",
206+
"longbridge-quant-hk-scheduler",
207+
]
208+
assert required == []
209+
assert skip_reason and "no configured Cloud Scheduler main job was due" in skip_reason
210+
assert scheduler_checked is True
211+
212+
175213
def test_main_skips_when_no_scheduler_main_job_is_due(monkeypatch, capsys):
176214
_clear_runtime_env(monkeypatch)
177215
monkeypatch.setenv("GCP_PROJECT_ID", "longbridgequant")

0 commit comments

Comments
 (0)