Skip to content

Commit 960775f

Browse files
authored
allow for configuring pr preview default start and min intervals separately (#5824)
Signed-off-by: Jesse Hodges <hodges.jesse@gmail.com>
1 parent cb8acc5 commit 960775f

9 files changed

Lines changed: 341 additions & 6 deletions

File tree

docs/integrations/github.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,10 @@ Below is an example of how to define the default config for the bot in either YA
293293
| `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N |
294294
| `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/<command>`. Default: `None` meaning no namespace is used. | string | N |
295295
| `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N |
296-
| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed. | str | N |
296+
| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start. | str | N |
297297
| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N |
298+
| `default_pr_preview_start` | Default start when computing [data previews](../concepts/plans.md#data-preview) for forward-only changes in PR environments. Defaults to `yesterday`, independent of `default_pr_start`, so preview data can be limited without reducing the regular PR backfill window. | str | N |
299+
| `pr_preview_min_intervals` | Intended for use when `default_pr_preview_start` is set to a relative time. This ensures that at least this many intervals are included for forward-only previews in the PR environment. Default: `1` | int | N |
298300
| `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N |
299301
| `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N |
300302
| `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N |
@@ -320,6 +322,7 @@ Example with all properties defined:
320322
sql: full
321323
seed: full
322324
default_pr_start: "1 week ago"
325+
default_pr_preview_start: "yesterday"
323326
skip_pr_backfill: false
324327
run_on_deploy_to_prod: false
325328
prod_branch_name: production
@@ -344,6 +347,7 @@ Example with all properties defined:
344347
seed=AutoCategorizationMode.FULL,
345348
),
346349
default_pr_start="1 week ago",
350+
default_pr_preview_start="yesterday",
347351
skip_pr_backfill=False,
348352
run_on_deploy_to_prod=False,
349353
prod_branch_name="production",

sqlmesh/core/context.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,8 @@ def plan_builder(
14821482
backfill_models: t.Optional[t.Collection[str]] = None,
14831483
categorizer_config: t.Optional[CategorizerConfig] = None,
14841484
enable_preview: t.Optional[bool] = None,
1485+
preview_start: t.Optional[TimeLike] = None,
1486+
preview_min_intervals: t.Optional[int] = None,
14851487
run: t.Optional[bool] = None,
14861488
diff_rendered: t.Optional[bool] = None,
14871489
skip_linter: t.Optional[bool] = None,
@@ -1523,6 +1525,8 @@ def plan_builder(
15231525
select_models: A list of model selection strings to filter the models that should be included into this plan.
15241526
backfill_models: A list of model selection strings to filter the models for which the data should be backfilled.
15251527
enable_preview: Indicates whether to enable preview for forward-only models in development environments.
1528+
preview_start: The start date for forward-only previews.
1529+
preview_min_intervals: The minimum number of intervals to preview for each forward-only preview snapshot.
15261530
run: Whether to run latest intervals as part of the plan application.
15271531
diff_rendered: Whether the diff should compare raw vs rendered models
15281532
min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered
@@ -1556,6 +1560,8 @@ def plan_builder(
15561560
"select_models": list(select_models) if select_models is not None else None,
15571561
"backfill_models": list(backfill_models) if backfill_models is not None else None,
15581562
"enable_preview": enable_preview,
1563+
"preview_start": preview_start,
1564+
"preview_min_intervals": preview_min_intervals,
15591565
"run": run,
15601566
"diff_rendered": diff_rendered,
15611567
"skip_linter": skip_linter,
@@ -1757,6 +1763,8 @@ def plan_builder(
17571763
enable_preview=(
17581764
enable_preview if enable_preview is not None else self._plan_preview_enabled
17591765
),
1766+
preview_start=preview_start,
1767+
preview_min_intervals=preview_min_intervals or 0,
17601768
end_bounded=not run,
17611769
ensure_finalized_snapshots=self.config.plan.use_finalized_state,
17621770
start_override_per_model=start_override_per_model,

sqlmesh/core/plan/builder.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class PlanBuilder:
8787
default_start: The default plan start to use if not specified.
8888
default_end: The default plan end to use if not specified.
8989
enable_preview: Whether to enable preview for forward-only models in development environments.
90+
preview_start: The start time to use for forward-only previews. Defaults to the plan start.
91+
preview_min_intervals: The minimum number of intervals to preview for each forward-only preview snapshot.
9092
end_bounded: If set to true, the missing intervals will be bounded by the target end date, disregarding lookback,
9193
allow_partials, and other attributes that could cause the intervals to exceed the target end date.
9294
ensure_finalized_snapshots: Whether to compare against snapshots from the latest finalized
@@ -125,6 +127,8 @@ def __init__(
125127
default_start: t.Optional[TimeLike] = None,
126128
default_end: t.Optional[TimeLike] = None,
127129
enable_preview: bool = False,
130+
preview_start: t.Optional[TimeLike] = None,
131+
preview_min_intervals: int = 0,
128132
end_bounded: bool = False,
129133
ensure_finalized_snapshots: bool = False,
130134
explain: bool = False,
@@ -148,6 +152,9 @@ def __init__(
148152
allow_additive_models if allow_additive_models is not None else []
149153
)
150154
self._enable_preview = enable_preview
155+
self._preview_start_provided = preview_start is not None
156+
self._preview_start = preview_start
157+
self._preview_min_intervals = preview_min_intervals
151158
self._end_bounded = end_bounded
152159
self._ensure_finalized_snapshots = ensure_finalized_snapshots
153160
self._ignore_cron = ignore_cron
@@ -179,9 +186,17 @@ def __init__(
179186
self._explain = explain
180187

181188
self._start = start
182-
if not self._start and (
183-
self._forward_only_preview_needed or self._non_forward_only_preview_needed
184-
):
189+
if not self._start and self._forward_only_preview_needed:
190+
self._preview_start = self._preview_start or default_start or yesterday_ds()
191+
# If a separate preview start was provided, don't let it shorten the
192+
# plan start for regular backfills. Fallback preview starts preserve
193+
# the previous preview behavior of using default_start or yesterday.
194+
if self._preview_start_provided and not self._skip_backfill:
195+
self._start = default_start or yesterday_ds()
196+
else:
197+
self._start = self._preview_start
198+
199+
if not self._start and self._non_forward_only_preview_needed:
185200
self._start = default_start or yesterday_ds()
186201

187202
self._plan_id: str = random_id()
@@ -226,6 +241,8 @@ def execution_time(self) -> TimeLike:
226241

227242
def set_start(self, new_start: TimeLike) -> PlanBuilder:
228243
self._start = new_start
244+
if not self._preview_start_provided and self._forward_only_preview_needed:
245+
self._preview_start = new_start
229246
self.override_start = True
230247
self._latest_plan = None
231248
return self
@@ -247,6 +264,8 @@ def set_effective_from(self, effective_from: t.Optional[TimeLike]) -> PlanBuilde
247264
self._effective_from = effective_from
248265
if effective_from and self._is_dev and not self.override_start:
249266
self._start = effective_from
267+
if not self._preview_start_provided and self._forward_only_preview_needed:
268+
self._preview_start = effective_from
250269
self._latest_plan = None
251270
return self
252271

@@ -447,9 +466,12 @@ def _build_restatements(
447466
possible_intervals = {
448467
restatements[p.snapshot_id] for p in restating_parents if p.is_incremental
449468
}
469+
removal_start = (
470+
self._forward_only_preview_start(snapshot, start, end) if is_preview else start
471+
)
450472
possible_intervals.add(
451473
snapshot.get_removal_interval(
452-
start,
474+
removal_start,
453475
end,
454476
self._execution_time,
455477
strict=False,
@@ -474,6 +496,21 @@ def _build_restatements(
474496

475497
return restatements
476498

499+
def _forward_only_preview_start(
500+
self, snapshot: Snapshot, default_start: TimeLike, end: TimeLike
501+
) -> TimeLike:
502+
preview_start = self._preview_start or default_start
503+
if not self._preview_min_intervals:
504+
return preview_start
505+
506+
relative_base = to_datetime(self.execution_time)
507+
preview_end = to_datetime(end, relative_base=relative_base)
508+
min_start = snapshot.node.cron_floor(preview_end)
509+
for _ in range(self._preview_min_intervals):
510+
min_start = snapshot.node.cron_prev(min_start)
511+
512+
return min(to_datetime(preview_start, relative_base=relative_base), min_start)
513+
477514
def _build_directly_and_indirectly_modified(
478515
self, dag: DAG[SnapshotId]
479516
) -> t.Tuple[t.Set[SnapshotId], SnapshotMapping]:

sqlmesh/integrations/github/cicd/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ class GithubCICDBotConfig(BaseConfig):
2727
default=None, alias="auto_categorize_changes"
2828
)
2929
default_pr_start: t.Optional[TimeLike] = None
30+
default_pr_preview_start: TimeLike = "yesterday"
3031
skip_pr_backfill_: t.Optional[bool] = Field(default=None, alias="skip_pr_backfill")
3132
pr_include_unmodified_: t.Optional[bool] = Field(default=None, alias="pr_include_unmodified")
3233
run_on_deploy_to_prod: bool = False
3334
pr_environment_name: t.Optional[str] = None
3435
pr_min_intervals: t.Optional[int] = None
36+
pr_preview_min_intervals: int = Field(default=1, ge=0)
3537
prod_branch_names_: t.Optional[str] = Field(default=None, alias="prod_branch_name")
3638
forward_only_branch_suffix_: t.Optional[str] = Field(
3739
default=None, alias="forward_only_branch_suffix"
@@ -88,9 +90,11 @@ def forward_only_branch_suffix(self) -> str:
8890
"command_namespace",
8991
"auto_categorize_changes",
9092
"default_pr_start",
93+
"default_pr_preview_start",
9194
"skip_pr_backfill",
9295
"pr_include_unmodified",
9396
"run_on_deploy_to_prod",
9497
"pr_min_intervals",
98+
"pr_preview_min_intervals",
9599
"forward_only_branch_suffix",
96100
}

sqlmesh/integrations/github/cicd/controller.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,8 @@ def pr_plan(self) -> Plan:
404404
categorizer_config=self.bot_config.auto_categorize_changes,
405405
start=self.bot_config.default_pr_start,
406406
min_intervals=self.bot_config.pr_min_intervals,
407+
preview_start=self.bot_config.default_pr_preview_start,
408+
preview_min_intervals=self.bot_config.pr_preview_min_intervals,
407409
skip_backfill=self.bot_config.skip_pr_backfill,
408410
include_unmodified=self.bot_config.pr_include_unmodified,
409411
forward_only=self.forward_only_plan,

tests/core/analytics/test_collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def test_on_cicd_command(collector: AnalyticsCollector, mocker: MockerFixture):
145145
{
146146
"seq_num": 1,
147147
"event_type": "CICD_COMMAND",
148-
"event": '{"command_name": "test_cicd", "command_args": ["arg_1", "arg_2"], "parent_command_names": ["parent_a", "parent_b"], "cicd_bot_config": {"invalidate_environment_after_deploy": true, "enable_deploy_command": false, "run_on_deploy_to_prod": false}}',
148+
"event": '{"command_name": "test_cicd", "command_args": ["arg_1", "arg_2"], "parent_command_names": ["parent_a", "parent_b"], "cicd_bot_config": {"invalidate_environment_after_deploy": true, "enable_deploy_command": false, "default_pr_preview_start": "yesterday", "run_on_deploy_to_prod": false, "pr_preview_min_intervals": 1}}',
149149
**common_fields,
150150
}
151151
),

0 commit comments

Comments
 (0)