Skip to content

Commit d2aac63

Browse files
Finally working!
1 parent f2f02b5 commit d2aac63

29 files changed

Lines changed: 5691 additions & 946 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,4 @@ visualizer.tgz
8787
package
8888
.mypy_cache/
8989
*.json
90+
python/egglog/exp/param_eq/trace/

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
## Verification
4545

4646
- Prefer the minimal code change and the minimal diff that solves the task; only broaden the change if the smaller fix is not sufficient.
47+
- For long-running profiling or trace probes, run them with explicit timeouts, check for lingering worker processes before and after, and inspect memory usage after any timeout or manual kill before starting the next experiment.
4748
- Run `make mypy` for typing changes.
4849
- Run targeted pytest for touched modules.
4950
- Run `make docs` for docs or public API changes.

python/egglog/bindings.pyi

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ __all__ = [
7878
"RustSpan",
7979
"Saturate",
8080
"Scan",
81+
"SchedulerHandle",
8182
"Schema",
8283
"Sequence",
8384
"SerializedEGraph",
@@ -135,6 +136,22 @@ class EGraph:
135136
def run_program(
136137
self, *commands: _Command, traceparent: str | None = None, tracestate: str | None = None
137138
) -> list[_CommandOutput]: ...
139+
def add_backoff_scheduler(
140+
self,
141+
match_limit: int,
142+
ban_length: int,
143+
*,
144+
egg_like: bool = False,
145+
haskell_backoff: bool = False,
146+
) -> SchedulerHandle: ...
147+
def run_ruleset_with_scheduler(
148+
self,
149+
ruleset: str,
150+
scheduler: SchedulerHandle,
151+
*,
152+
traceparent: str | None = None,
153+
tracestate: str | None = None,
154+
) -> RunReport: ...
138155
def serialize(
139156
self,
140157
root_eclasses: list[_Expr],
@@ -175,6 +192,9 @@ class Value:
175192
def __gt__(self, other: object) -> bool: ...
176193
def __ge__(self, other: object) -> bool: ...
177194

195+
@final
196+
class SchedulerHandle: ...
197+
178198
@final
179199
class EggSmolError(Exception):
180200
context: str

python/egglog/egraph.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,31 @@ def _run_schedule(self, schedule: Schedule) -> bindings.RunReport:
976976
assert isinstance(command_output, bindings.RunScheduleOutput)
977977
return command_output.report
978978

979+
def _add_backoff_scheduler(
980+
self,
981+
*,
982+
match_limit: int,
983+
ban_length: int,
984+
egg_like: bool,
985+
haskell_backoff: bool = False,
986+
) -> bindings.SchedulerHandle:
987+
return self._egraph.add_backoff_scheduler(
988+
match_limit,
989+
ban_length,
990+
egg_like=egg_like,
991+
haskell_backoff=haskell_backoff,
992+
)
993+
994+
def _run_ruleset_with_scheduler(
995+
self,
996+
ruleset: Ruleset | UnstableCombinedRuleset,
997+
scheduler: bindings.SchedulerHandle,
998+
) -> bindings.RunReport:
999+
self._add_decls(ruleset)
1000+
ruleset_ident = ruleset.__egg_ident__
1001+
self._state.ruleset_to_egg(ruleset_ident)
1002+
return call_with_current_trace(self._egraph.run_ruleset_with_scheduler, str(ruleset_ident), scheduler)
1003+
9791004
def stats(self) -> bindings.RunReport:
9801005
"""
9811006
Returns the overall run report for the egraph.

python/egglog/exp/param_eq/Makefile

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# Canonical local workflow for the retained param-eq replication package.
22
# The file dependencies document the expected order:
33
# 1. normalize archived Haskell outputs
4-
# 2. run the Egglog corpus against those normalized rows
5-
# 3. refresh the golden cases and notebook
6-
# 4. run the replication-local tests
4+
# 2. run the current local Haskell pipeline across the same rows
5+
# 3. run the Egglog corpus baseline and scheduler ablations against those rows
6+
# 4. refresh the golden cases and notebook
7+
# 5. run the replication-local tests
78

89
PARAM_EQ_DIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
910
REPO_ROOT := $(abspath $(PARAM_EQ_DIR)/../../../..)
@@ -13,9 +14,12 @@ export EGGLOG_PARAM_EQ_DATA_DIR
1314
ARTIFACT_DIR := $(PARAM_EQ_DIR)/artifacts
1415
GOLDEN := $(PARAM_EQ_DIR)/haskell_golden.json
1516
NOTEBOOK := $(PARAM_EQ_DIR)/replication.ipynb
16-
HASKELL_ROWS := $(ARTIFACT_DIR)/haskell_paper_rows.csv
17+
ARCHIVED_HASKELL_ROWS := $(ARTIFACT_DIR)/haskell_paper_rows.csv
18+
LIVE_HASKELL_ROWS := $(ARTIFACT_DIR)/haskell_live_rows.csv
1719
RUNTIME_ROWS := $(ARTIFACT_DIR)/pagie_runtime_scatter.csv
1820
EGGLOG_ROWS := $(ARTIFACT_DIR)/egglog_paper_rows.csv
21+
EGGLOG_ABLATION_ROWS := $(ARTIFACT_DIR)/egglog_ablation_rows.csv
22+
ABLATION_SUMMARY := $(ARTIFACT_DIR)/ablation_summary.csv
1923

2024
HASKELL_FIXTREE := $(EGGLOG_PARAM_EQ_DATA_DIR)/src/FixTree.hs
2125
HASKELL_MAIN := $(EGGLOG_PARAM_EQ_DATA_DIR)/src/Main.hs
@@ -28,23 +32,41 @@ HASKELL_EXPRS := \
2832
$(wildcard $(HASKELL_RESULTS)/exprs/*_exprs_*) \
2933
$(wildcard $(HASKELL_RESULTS)/exprs_simpl/*_exprs_*)
3034

31-
.PHONY: golden artifacts notebook test all
35+
.PHONY: golden archived-artifacts live-haskell artifacts ablations notebook test all
3236

33-
$(HASKELL_ROWS) $(RUNTIME_ROWS): $(PARAM_EQ_DIR)/normalize_archives.py $(PARAM_EQ_DIR)/paths.py $(HASKELL_TABLES) $(HASKELL_EXPRS) $(HASKELL_RUNTIME)
37+
$(ARCHIVED_HASKELL_ROWS) $(RUNTIME_ROWS): $(PARAM_EQ_DIR)/normalize_archives.py $(PARAM_EQ_DIR)/paths.py $(HASKELL_TABLES) $(HASKELL_EXPRS) $(HASKELL_RUNTIME)
3438
cd $(REPO_ROOT) && uv run python -m egglog.exp.param_eq.normalize_archives
3539

36-
$(EGGLOG_ROWS): $(PARAM_EQ_DIR)/run_egglog_corpus.py $(PARAM_EQ_DIR)/pipeline.py $(HASKELL_ROWS) $(RUNTIME_ROWS)
40+
$(LIVE_HASKELL_ROWS): $(PARAM_EQ_DIR)/run_haskell_corpus.py $(PARAM_EQ_DIR)/generate_haskell_golden.py $(PARAM_EQ_DIR)/pipeline.py $(PARAM_EQ_DIR)/paths.py $(ARCHIVED_HASKELL_ROWS) $(HASKELL_FIXTREE) $(HASKELL_MAIN)
41+
cd $(REPO_ROOT) && uv run python -m egglog.exp.param_eq.run_haskell_corpus
42+
43+
$(EGGLOG_ROWS): $(PARAM_EQ_DIR)/run_egglog_corpus.py $(PARAM_EQ_DIR)/pipeline.py $(ARCHIVED_HASKELL_ROWS) $(RUNTIME_ROWS)
3744
cd $(REPO_ROOT) && uv run python -m egglog.exp.param_eq.run_egglog_corpus
3845

39-
$(GOLDEN): $(PARAM_EQ_DIR)/generate_haskell_golden.py $(PARAM_EQ_DIR)/pipeline.py $(PARAM_EQ_DIR)/paths.py $(HASKELL_FIXTREE) $(HASKELL_MAIN) $(HASKELL_ROWS)
46+
$(EGGLOG_ABLATION_ROWS): $(PARAM_EQ_DIR)/run_egglog_corpus.py $(PARAM_EQ_DIR)/pipeline.py $(ARCHIVED_HASKELL_ROWS) $(LIVE_HASKELL_ROWS)
47+
cd $(REPO_ROOT) && uv run python -m egglog.exp.param_eq.run_egglog_corpus \
48+
--output $(EGGLOG_ABLATION_ROWS) \
49+
--mode egglog-baseline \
50+
--mode no-haskell-backoff \
51+
--mode no-graph-size-stop \
52+
--mode no-bound-scheduler \
53+
--mode no-fresh-rematch
54+
55+
$(GOLDEN): $(PARAM_EQ_DIR)/generate_haskell_golden.py $(PARAM_EQ_DIR)/pipeline.py $(PARAM_EQ_DIR)/paths.py $(HASKELL_FIXTREE) $(HASKELL_MAIN) $(ARCHIVED_HASKELL_ROWS)
4056
cd $(REPO_ROOT) && uv run python -m egglog.exp.param_eq.generate_haskell_golden
4157

42-
$(NOTEBOOK): $(PARAM_EQ_DIR)/replication.py $(HASKELL_ROWS) $(EGGLOG_ROWS) $(RUNTIME_ROWS)
58+
$(NOTEBOOK): $(PARAM_EQ_DIR)/replication.py $(ARCHIVED_HASKELL_ROWS) $(LIVE_HASKELL_ROWS) $(EGGLOG_ROWS) $(ABLATION_SUMMARY) $(RUNTIME_ROWS)
4359
cd $(REPO_ROOT) && uv run python python/egglog/exp/param_eq/replication.py
4460

61+
archived-artifacts: $(ARCHIVED_HASKELL_ROWS) $(RUNTIME_ROWS)
62+
63+
live-haskell: $(LIVE_HASKELL_ROWS)
64+
4565
golden: $(GOLDEN)
4666

47-
artifacts: $(EGGLOG_ROWS)
67+
artifacts: $(ARCHIVED_HASKELL_ROWS) $(RUNTIME_ROWS) $(LIVE_HASKELL_ROWS) $(EGGLOG_ROWS)
68+
69+
ablations: $(EGGLOG_ABLATION_ROWS)
4870

4971
notebook: $(NOTEBOOK)
5072

0 commit comments

Comments
 (0)