PolicyEngine
diff --git a/‎projects/policyengine-api-simulation/fixtures/gateway/shared.py‎
Lines changed: 11 additions & 2 deletions b/‎projects/policyengine-api-simulation/fixtures/gateway/shared.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎projects/policyengine-api-simulation/pyproject.toml‎
Lines changed: 7 additions & 0 deletions b/‎projects/policyengine-api-simulation/pyproject.toml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎projects/policyengine-api-simulation/src/modal/app.py‎
Lines changed: 14 additions & 10 deletions b/‎projects/policyengine-api-simulation/src/modal/app.py‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎projects/policyengine-api-simulation/src/modal/budget_window_results.py‎
Lines changed: 51 additions & 14 deletions b/‎projects/policyengine-api-simulation/src/modal/budget_window_results.py‎
Lines changed: 51 additions & 14 deletions
diff --git a/‎projects/policyengine-api-simulation/src/modal/budget_window_scheduler.py‎
Lines changed: 52 additions & 6 deletions b/‎projects/policyengine-api-simulation/src/modal/budget_window_scheduler.py‎
Lines changed: 52 additions & 6 deletions
diff --git a/‎projects/policyengine-api-simulation/src/modal/budget_window_state.py‎
Lines changed: 35 additions & 2 deletions b/‎projects/policyengine-api-simulation/src/modal/budget_window_state.py‎
Lines changed: 35 additions & 2 deletions
@@ -4,17 +4,26 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
+from src.modal.gateway.auth import require_auth
 from src.modal.gateway.endpoints import router
 
 
-def create_gateway_app() -> FastAPI:
-    """Create a FastAPI app with the gateway router for testing."""
+def create_gateway_app(*, authenticate: bool = True) -> FastAPI:
+    """Create a FastAPI app with the gateway router for testing.
+
+    By default the auth dependency is overridden with a no-op callable so
+    individual tests don't need to stage JWT material. Tests that exercise
+    the auth failure path can pass ``authenticate=False`` to keep the real
+    dependency wired up.
+    """
     app = FastAPI(
         title="Test PolicyEngine Simulation API",
         description="Test instance for unit tests",
         version="0.0.1",
     )
     app.include_router(router)
+    if authenticate:
+        app.dependency_overrides[require_auth] = lambda: None
     return app
 
 
 
@@ -41,3 +41,10 @@ pythonpath = [
 ]
 
 testpaths = ["tests"]
+
+# Skip real-Modal integration smoke tests unless the operator explicitly
+# opts in with ``-m integration``. The default test run stays hermetic.
+addopts = "-m 'not integration'"
+markers = [
+  "integration: runs against a real (ephemeral) Modal deployment",
+]
@@ -11,6 +11,7 @@
 import os
 
 from src.modal._image_setup import snapshot_models
+from src.modal.logging_redaction import redact_params_for_logging
 
 # Get versions from environment or use defaults
 US_VERSION = os.environ.get("POLICYENGINE_US_VERSION", "1.562.3")
@@ -94,14 +95,18 @@ def run_simulation(params: dict) -> dict:
 
     configure_logfire()
 
+    # We deliberately avoid sending full ``params`` or ``result`` blobs to
+    # Logfire: both can embed signed URLs, reform parameter trees with
+    # sensitive policy details, or result payloads large enough to blow the
+    # span attribute size budget. The redacted summary keeps correlation
+    # traceability via run_id while leaving the heavy payload in memory.
+    redacted_params = redact_params_for_logging(params)
     try:
         with logfire.span(
             "run_simulation",
-            input_params=params,
-        ) as span:
-            result = run_simulation_impl(params)
-            span.set_attribute("output_result", result)
-            return result
+            **redacted_params,
+        ):
+            return run_simulation_impl(params)
     finally:
         logfire.force_flush()
 
@@ -123,13 +128,12 @@ def run_budget_window_batch(params: dict) -> dict:
 
     configure_logfire()
 
+    redacted_params = redact_params_for_logging(params)
     try:
         with logfire.span(
             "run_budget_window_batch",
-            input_params=params,
-        ) as span:
-            result = run_budget_window_batch_impl(params)
-            span.set_attribute("output_result", result)
-            return result
+            **redacted_params,
+        ):
+            return run_budget_window_batch_impl(params)
     finally:
         logfire.force_flush()
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from decimal import Decimal
 from typing import Any
 
 from src.modal.gateway.models import (
@@ -10,12 +11,25 @@
     BudgetWindowTotals,
 )
 
+# The UK microsimulation has no state/province fiscal layer, so worker child
+# results for ``country="uk"`` never emit ``state_tax_revenue_impact``. The
+# parent aggregator treats it as optional with a zero default; US results are
+# expected to supply it as a real number. All other keys remain mandatory.
 REQUIRED_BUDGET_KEYS = (
     "tax_revenue_impact",
-    "state_tax_revenue_impact",
     "benefit_spending_impact",
     "budgetary_impact",
 )
+OPTIONAL_BUDGET_KEYS = ("state_tax_revenue_impact",)
+
+
+def _as_decimal(value: float | int) -> Decimal:
+    """Convert an annual impact float to Decimal without reintroducing
+    binary-float quantisation noise. ``Decimal(str(...))`` is the canonical
+    idiom because it serialises the float to its shortest round-trippable
+    decimal form before parsing."""
+
+    return Decimal(str(value))
 
 
 def extract_annual_impact(
@@ -38,8 +52,13 @@ def extract_annual_impact(
             f"Malformed budget-window child result: missing numeric {missing}"
         )
 
-    state_tax_revenue_impact = budget["state_tax_revenue_impact"]
     tax_revenue_impact = budget["tax_revenue_impact"]
+    # UK worker results omit the state fiscal layer entirely; coerce to 0.0
+    # so the parent aggregator can still report federal/state splits with a
+    # uniform shape across countries.
+    state_tax_revenue_impact = budget.get("state_tax_revenue_impact")
+    if not isinstance(state_tax_revenue_impact, int | float):
+        state_tax_revenue_impact = 0.0
 
     return BudgetWindowAnnualImpact(
         year=simulation_year,
@@ -54,22 +73,40 @@ def extract_annual_impact(
 def sum_annual_impacts(
     annual_impacts: list[BudgetWindowAnnualImpact],
 ) -> BudgetWindowTotals:
-    totals = {
-        "taxRevenueImpact": 0,
-        "federalTaxRevenueImpact": 0,
-        "stateTaxRevenueImpact": 0,
-        "benefitSpendingImpact": 0,
-        "budgetaryImpact": 0,
+    """Sum per-year impacts using Decimal accumulators.
+
+    Binary-float addition accumulates rounding error for long budget windows
+    (10-year sums over billion-dollar baselines quickly drift by ``1e-6`` or
+    more). Accumulating in :class:`decimal.Decimal` keeps the answer exact
+    to the input precision; we cast back to ``float`` at the serialisation
+    boundary so the JSON schema stays numeric and clients that parse the
+    response as ``number`` continue to work unchanged. Clients that need
+    bit-exact accounting should request the individual per-year impacts and
+    sum them in their preferred numeric type.
+    """
+
+    totals: dict[str, Decimal] = {
+        "taxRevenueImpact": Decimal(0),
+        "federalTaxRevenueImpact": Decimal(0),
+        "stateTaxRevenueImpact": Decimal(0),
+        "benefitSpendingImpact": Decimal(0),
+        "budgetaryImpact": Decimal(0),
     }
 
     for annual_impact in annual_impacts:
-        totals["taxRevenueImpact"] += annual_impact.taxRevenueImpact
-        totals["federalTaxRevenueImpact"] += annual_impact.federalTaxRevenueImpact
-        totals["stateTaxRevenueImpact"] += annual_impact.stateTaxRevenueImpact
-        totals["benefitSpendingImpact"] += annual_impact.benefitSpendingImpact
-        totals["budgetaryImpact"] += annual_impact.budgetaryImpact
+        totals["taxRevenueImpact"] += _as_decimal(annual_impact.taxRevenueImpact)
+        totals["federalTaxRevenueImpact"] += _as_decimal(
+            annual_impact.federalTaxRevenueImpact
+        )
+        totals["stateTaxRevenueImpact"] += _as_decimal(
+            annual_impact.stateTaxRevenueImpact
+        )
+        totals["benefitSpendingImpact"] += _as_decimal(
+            annual_impact.benefitSpendingImpact
+        )
+        totals["budgetaryImpact"] += _as_decimal(annual_impact.budgetaryImpact)
 
-    return BudgetWindowTotals(**totals)
+    return BudgetWindowTotals(**{key: float(value) for key, value in totals.items()})
 
 
 def build_budget_window_result(
 
@@ -29,8 +29,23 @@
     put_batch_job_seed,
     put_batch_job_state,
 )
-
-POLL_INTERVAL_SECONDS = 0.1
+from src.modal.gateway.errors import log_and_redact_exception
+
+# Polling tuning. The runner busy-loops across child FunctionCall.get(timeout=0)
+# probes; when no child resolved we sleep before the next probe to stop the
+# Modal control-plane from getting hammered. We start aggressive (0.5s) so
+# fast child runs don't inflate end-to-end latency, then double up to 30s so a
+# sluggish child doesn't keep the parent container hot polling. A blocking
+# FunctionCall.get(timeout=...) would be even better, but its interaction with
+# max_parallel means we'd have to juggle per-year deadlines and give up early
+# termination on child failure; the exponential walk keeps the control flow
+# simple while matching Modal's recommended polling cadence.
+POLL_INTERVAL_INITIAL_SECONDS = 0.5
+POLL_INTERVAL_MAX_SECONDS = 30.0
+POLL_INTERVAL_BACKOFF_FACTOR = 2.0
+# Retained for backward compatibility with callers that imported the original
+# constant; new code should use the initial/max pair above.
+POLL_INTERVAL_SECONDS = POLL_INTERVAL_INITIAL_SECONDS
 
 
 def serialize_batch_status(state) -> dict[str, Any]:
@@ -59,10 +74,16 @@ def __init__(
         context: BudgetWindowBatchContext,
         *,
         modal_module=None,
-        poll_interval_seconds: float = POLL_INTERVAL_SECONDS,
+        poll_interval_seconds: float = POLL_INTERVAL_INITIAL_SECONDS,
+        poll_interval_max_seconds: float = POLL_INTERVAL_MAX_SECONDS,
+        poll_interval_backoff_factor: float = POLL_INTERVAL_BACKOFF_FACTOR,
     ):
         self.context = context
         self.modal = modal if modal_module is None else modal_module
+        self.poll_interval_initial_seconds = poll_interval_seconds
+        self.poll_interval_max_seconds = poll_interval_max_seconds
+        self.poll_interval_backoff_factor = poll_interval_backoff_factor
+        # Kept for tests that still read this attribute.
         self.poll_interval_seconds = poll_interval_seconds
         self.state = load_or_create_batch_state(context)
         self.child_func = self.modal.Function.from_name(
@@ -75,13 +96,22 @@ def run(self) -> dict[str, Any]:
         mark_batch_running(self.state)
         put_batch_job_state(self.state)
 
+        # Exponential backoff: reset on any progress, double on empty polls.
+        current_sleep = self.poll_interval_initial_seconds
+
         while self.has_pending_work():
             self.spawn_until_capacity()
             progress_made = self.poll_running_children_once()
             if self.state.status == "failed":
                 return serialize_batch_status(self.state)
             if self.state.running_years and not progress_made:
-                time.sleep(self.poll_interval_seconds)
+                time.sleep(current_sleep)
+                current_sleep = min(
+                    current_sleep * self.poll_interval_backoff_factor,
+                    self.poll_interval_max_seconds,
+                )
+            elif progress_made:
+                current_sleep = self.poll_interval_initial_seconds
 
         return self.complete_batch()
 
@@ -122,9 +152,17 @@ def poll_running_children_once(self) -> bool:
             except TimeoutError:
                 continue
             except Exception as exc:
+                redacted = log_and_redact_exception(
+                    exc,
+                    scope="budget_window_child_call",
+                    context={
+                        "batch_job_id": self.context.batch_job_id,
+                        "simulation_year": simulation_year,
+                    },
+                )
                 self.fail_batch_for_child_error(
                     simulation_year=simulation_year,
-                    error=str(exc),
+                    error=redacted,
                 )
                 return False
 
@@ -134,9 +172,17 @@ def poll_running_children_once(self) -> bool:
                     child_result=child_result,
                 )
             except Exception as exc:
+                redacted = log_and_redact_exception(
+                    exc,
+                    scope="budget_window_child_result_parsing",
+                    context={
+                        "batch_job_id": self.context.batch_job_id,
+                        "simulation_year": simulation_year,
+                    },
+                )
                 self.fail_batch_for_child_error(
                     simulation_year=simulation_year,
-                    error=str(exc),
+                    error=redacted,
                 )
                 return False
 
 
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import logging
 from datetime import UTC, datetime
 
 import modal
@@ -16,6 +17,10 @@
     PolicyEngineBundle,
 )
 
+logger = logging.getLogger(__name__)
+
+_UNKNOWN_CHILD_JOB_ID = "unknown"
+
 BUDGET_WINDOW_JOB_DICT_NAME = "simulation-api-budget-window-jobs"
 BUDGET_WINDOW_JOB_SEED_DICT_NAME = "simulation-api-budget-window-job-seeds"
 
@@ -129,6 +134,34 @@ def mark_child_started(
     return _touch(state)
 
 
+def _existing_child_or_sentinel(
+    state: BudgetWindowBatchState, *, year: str
+) -> BatchChildJobStatus:
+    """Return the tracked child for ``year`` or synthesise a sentinel.
+
+    Callers (``mark_child_completed`` / ``mark_child_failed``) used to index
+    ``state.child_jobs[year]`` directly which would raise ``KeyError`` if
+    transition helpers were invoked out of order (e.g., after recovery from
+    a dropped ``mark_child_started`` due to a crash between spawn and seed
+    persistence). In that unusual case we'd rather surface a redacted
+    terminal state with a synthetic job id than abort the whole batch. The
+    anomaly is logged at WARNING so operators can investigate separately.
+    """
+    child = state.child_jobs.get(year)
+    if child is not None:
+        return child
+
+    logger.warning(
+        "Transitioning child state for year %s with no prior child_jobs entry;"
+        " synthesising a sentinel job id",
+        year,
+        extra={"year": year, "batch_job_id": state.batch_job_id},
+    )
+    sentinel = BatchChildJobStatus(job_id=_UNKNOWN_CHILD_JOB_ID, status="pending")
+    state.child_jobs[year] = sentinel
+    return sentinel
+
+
 def mark_child_completed(
     state: BudgetWindowBatchState,
     *,
@@ -140,7 +173,7 @@ def mark_child_completed(
     if year not in state.completed_years:
         state.completed_years.append(year)
 
-    child = state.child_jobs[year]
+    child = _existing_child_or_sentinel(state, year=year)
     state.child_jobs[year] = BatchChildJobStatus(
         job_id=child.job_id,
         status="complete",
@@ -160,7 +193,7 @@ def mark_child_failed(
     if year not in state.failed_years:
         state.failed_years.append(year)
 
-    child = state.child_jobs[year]
+    child = _existing_child_or_sentinel(state, year=year)
     state.child_jobs[year] = BatchChildJobStatus(
         job_id=child.job_id,
         status="failed",