From 4dc1a066aeaac6c61a3df9be0f38e335ec3be695 Mon Sep 17 00:00:00 2001
From: ryanorendorff <gentler_portage6g@icloud.com>
Date: Sun, 24 May 2026 20:45:50 -0600
Subject: [PATCH] Add monthly CI to refresh README

Bumps the lattice-estimator submodule and refreshes README.md and the
estimates CSV via a PR when the submodule or parameter_db.csv has
changed. Runs monthly and on workflow_dispatch, and will only fire
upstream once this branch is on the default branch.
---
 .github/workflows/refresh-readme.yml | 271 +++++++++++++++++++++++++++
 .gitmodules                          |   1 +
 Makefile                             |  20 +-
 ci/list_parameter_ids.py             |  34 ++++
 ci/merge_estimates.py                |  59 ++++++
 ci/parameters_diverged.py            |  36 ++++
 src/estimate_security.py             |  29 ++-
 7 files changed, 434 insertions(+), 16 deletions(-)
 create mode 100644 .github/workflows/refresh-readme.yml
 create mode 100644 ci/list_parameter_ids.py
 create mode 100644 ci/merge_estimates.py
 create mode 100644 ci/parameters_diverged.py

diff --git a/.github/workflows/refresh-readme.yml b/.github/workflows/refresh-readme.yml
new file mode 100644
index 0000000..51e54f8
--- /dev/null
+++ b/.github/workflows/refresh-readme.yml
@@ -0,0 +1,271 @@
+name: Refresh README
+
+on:
+  schedule:
+    # 06:00 UTC on the 1st of each month.
+    - cron: "0 6 1 * *"
+  workflow_dispatch:
+
+# Read-only by default. Only the commit job upgrades to write.
+permissions:
+  contents: read
+
+concurrency:
+  group: refresh-readme-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  prepare:
+    runs-on: ubuntu-24.04
+    outputs:
+      sha: ${{ steps.bump.outputs.sha }}
+      short_sha: ${{ steps.bump.outputs.short_sha }}
+      ids: ${{ steps.matrix.outputs.ids }}
+      changed: ${{ steps.changed.outputs.changed }}
+    steps:
+      - name: Checkout (with submodules, full history)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          submodules: recursive
+          # fetch-depth: 0 so the change-detection step can ask git
+          # which commit last touched the estimates CSV.
+          fetch-depth: 0
+
+      - name: Bump lattice-estimator submodule to upstream HEAD
+        id: bump
+        run: |
+          set -euo pipefail
+          git submodule update --remote --recursive src/lattice_estimator
+          sha=$(git -C src/lattice_estimator rev-parse HEAD)
+          short=$(git -C src/lattice_estimator rev-parse --short HEAD)
+          echo "lattice-estimator HEAD: $sha"
+          {
+            echo "sha=$sha"
+            echo "short_sha=$short"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Detect whether a refresh is needed
+        id: changed
+        run: |
+          set -euo pipefail
+          committed=$(git ls-tree HEAD src/lattice_estimator | awk '{print $3}')
+          bumped="${{ steps.bump.outputs.sha }}"
+          result="false"
+          if [[ "$bumped" != "$committed" ]]; then
+            echo "estimator moved $committed -> $bumped"
+            result="true"
+          elif drift=$(python3 ci/parameters_diverged.py); then
+            if [[ "$drift" == "true" ]]; then
+              echo "parameter_db.csv differs from cached estimates"
+              result="true"
+            else
+              echo "estimator and parameter_db unchanged — downstream jobs will skip"
+            fi
+          else
+            echo "parameters_diverged.py crashed — assuming refresh needed"
+            result="true"
+          fi
+          echo "changed=$result" >> "$GITHUB_OUTPUT"
+
+      - name: Compute shard matrix from parameter_db.csv
+        id: matrix
+        run: |
+          set -euo pipefail
+          ids=$(python3 ci/list_parameter_ids.py)
+          echo "matrix ids: $ids"
+          echo "ids=$ids" >> "$GITHUB_OUTPUT"
+
+  estimate:
+    needs: prepare
+    if: needs.prepare.outputs.changed == 'true'
+    runs-on: ubuntu-24.04
+    permissions: {}  # no token; this job runs upstream estimator code
+    container:
+      image: sagemath/sagemath:latest
+      options: --user root
+    defaults:
+      run:
+        # The sagemath container's default shell is dash, which does not
+        # support `set -o pipefail`. Force bash for all run steps in this job.
+        shell: bash
+    strategy:
+      fail-fast: false
+      matrix:
+        id: ${{ fromJson(needs.prepare.outputs.ids) }}
+    timeout-minutes: 60
+    steps:
+      - name: Install git in container
+        run: |
+          apt-get update -qq
+          apt-get install -y --no-install-recommends git ca-certificates
+
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Mark workspace as a safe git directory
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+          git config --global --add safe.directory "$GITHUB_WORKSPACE/src/lattice_estimator"
+
+      - name: Clone lattice-estimator at the prepared SHA
+        env:
+          SUBMODULE_SHA: ${{ needs.prepare.outputs.sha }}
+        run: |
+          set -euo pipefail
+          rm -rf src/lattice_estimator
+          git clone https://github.com/malb/lattice-estimator.git src/lattice_estimator
+          git -C src/lattice_estimator checkout "$SUBMODULE_SHA"
+
+      - name: Install Python deps into Sage's interpreter
+        run: sage --pip install --no-warn-script-location -r src/requirements.txt
+
+      - name: Estimate parameter set
+        env:
+          MATRIX_ID: ${{ matrix.id }}
+        run: |
+          set -euo pipefail
+          mkdir -p out
+          sage --python src/estimate_security.py \
+            --ids "$MATRIX_ID" \
+            --output "out/partial-$MATRIX_ID.csv" \
+            --jobs 2
+
+      - name: Show estimates
+        if: always()
+        env:
+          MATRIX_ID: ${{ matrix.id }}
+        run: |
+          echo "::group::Estimates for parameter set $MATRIX_ID"
+          if [[ -f "out/partial-$MATRIX_ID.csv" ]]; then
+            (command -v column >/dev/null && column -s, -t "out/partial-$MATRIX_ID.csv") || cat "out/partial-$MATRIX_ID.csv"
+          else
+            echo "(no partial CSV produced — estimator failed before writing output)"
+          fi
+          echo "::endgroup::"
+
+      - name: Upload partial CSV
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: estimates-${{ matrix.id }}
+          path: out/partial-${{ matrix.id }}.csv
+          if-no-files-found: error
+          retention-days: 7
+
+  # Tokenless: merges partial CSVs and regenerates README. The output
+  # README + canonical CSV are passed to the commit job as an artifact so
+  # the privileged job only handles git/PR mechanics.
+  build:
+    needs: [prepare, estimate]
+    if: needs.prepare.outputs.changed == 'true'
+    runs-on: ubuntu-24.04
+    permissions: {}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Create venv and install Python deps
+        run: |
+          set -euo pipefail
+          python3 -m venv .venv
+          .venv/bin/pip install --quiet -r src/requirements.txt
+
+      - name: Download all partial CSVs (scoped to this run)
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        with:
+          path: partials
+          pattern: estimates-*
+          run-id: ${{ github.run_id }}
+          github-token: ${{ github.token }}
+
+      - name: Merge partial CSVs into the canonical estimates file
+        run: |
+          .venv/bin/python ci/merge_estimates.py 'partials/estimates-*/partial-*.csv' \
+            --parameter-db src/data/parameter_db.csv
+
+      - name: Regenerate README
+        run: PATH="$PWD/.venv/bin:$PATH" make readme
+
+      - name: Upload regenerated README + estimates CSV
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: refreshed-readme
+          path: |
+            README.md
+            src/data/lattice_estimator_estimates.csv
+          if-no-files-found: error
+          retention-days: 7
+
+  # Minimal write-permission job: pins the submodule, copies the artifact
+  # from `build` into place, commits to a chore branch, and opens a PR.
+  # Does not run estimator or pandas code.
+  commit:
+    needs: [prepare, build]
+    if: needs.prepare.outputs.changed == 'true'
+    runs-on: ubuntu-24.04
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout (full history, with submodules)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          submodules: recursive
+          fetch-depth: 0
+
+      - name: Pin lattice-estimator submodule to prepare's SHA
+        env:
+          SUBMODULE_SHA: ${{ needs.prepare.outputs.sha }}
+        run: |
+          set -euo pipefail
+          cd src/lattice_estimator
+          git fetch --unshallow origin 2>/dev/null || git fetch origin
+          git checkout "$SUBMODULE_SHA"
+
+      - name: Download regenerated README + CSV
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        with:
+          name: refreshed-readme
+          run-id: ${{ github.run_id }}
+          github-token: ${{ github.token }}
+
+      - name: Open PR if anything changed
+        env:
+          GH_TOKEN: ${{ github.token }}
+          SHORT_SHA: ${{ needs.prepare.outputs.short_sha }}
+          BASE_BRANCH: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          # Check only the paths we actually commit (artifact download
+          # leaves untracked files that would otherwise mask the no-op path).
+          tracked_paths=(src/lattice_estimator src/data/lattice_estimator_estimates.csv README.md)
+          if [[ -z "$(git status --porcelain -- "${tracked_paths[@]}")" ]]; then
+            echo "No changes to estimator pin, estimates, or README — nothing to commit."
+            exit 0
+          fi
+
+          branch="chore/refresh-readme-$(date -u +%Y%m%d)-${GITHUB_RUN_ID}"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          git checkout -b "$branch"
+          git add -- "${tracked_paths[@]}"
+          git commit -m "chore: monthly estimator refresh (lattice-estimator ${SHORT_SHA})"
+          git push origin "$branch"
+
+          # Body is built via printf with positional args so no shell
+          # expansion of attacker-controllable values happens in a heredoc.
+          run_url="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+          body=$(printf 'Automated refresh from the `Refresh README` workflow.\n\n- lattice-estimator submodule resolved to upstream main HEAD: `%s`\n- Re-ran the estimator across all parameter sets in `src/data/parameter_db.csv`\n- Regenerated `README.md` from the refreshed CSV\n\nTriggered by `%s` against base `%s` (run [#%s](%s)).\n' \
+            "$SHORT_SHA" "$GITHUB_EVENT_NAME" "$BASE_BRANCH" "$GITHUB_RUN_ID" "$run_url")
+
+          gh pr create \
+            --base "$BASE_BRANCH" \
+            --head "$branch" \
+            --title "chore: monthly estimator refresh (lattice-estimator ${SHORT_SHA})" \
+            --body "$body"
diff --git a/.gitmodules b/.gitmodules
index 8b5bd1d..1260dfa 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,4 @@
 [submodule "src/lattice_estimator"]
 	path = src/lattice_estimator
 	url = https://github.com/malb/lattice-estimator.git
+	branch = main
diff --git a/Makefile b/Makefile
index eefdd14..ad9a89b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,20 @@
 all:
 	python3 src/estimate_security.py
-	cat src/markdown/todo.md > README.md
-	cat src/markdown/part1.md >> README.md
+	awk 1 src/markdown/todo.md > README.md
+	awk 1 src/markdown/part1.md >> README.md
 	python3 src/gen_attack_table.py >> README.md
-	cat src/markdown/part2.md >> README.md
+	awk 1 src/markdown/part2.md >> README.md
 	python3 src/gen_parameter_table.py >> README.md
-	cat src/markdown/part3.md >> README.md
+	awk 1 src/markdown/part3.md >> README.md
 	python3 src/gen_security_estimation_table.py >> README.md
-	cat src/markdown/part4.md >> README.md
+	awk 1 src/markdown/part4.md >> README.md
 
 readme:
-	cat src/markdown/todo.md > README.md
-	cat src/markdown/part1.md >> README.md
+	awk 1 src/markdown/todo.md > README.md
+	awk 1 src/markdown/part1.md >> README.md
 	python3 src/gen_attack_table.py >> README.md
-	cat src/markdown/part2.md >> README.md
+	awk 1 src/markdown/part2.md >> README.md
 	python3 src/gen_parameter_table.py >> README.md
-	cat src/markdown/part3.md >> README.md
+	awk 1 src/markdown/part3.md >> README.md
 	python3 src/gen_security_estimation_table.py >> README.md
-	cat src/markdown/part4.md >> README.md
+	awk 1 src/markdown/part4.md >> README.md
diff --git a/ci/list_parameter_ids.py b/ci/list_parameter_ids.py
new file mode 100644
index 0000000..8bc9a6b
--- /dev/null
+++ b/ci/list_parameter_ids.py
@@ -0,0 +1,34 @@
+"""Emit parameter_db.csv's ID column as a JSON array for the CI matrix.
+
+Integer coercion rejects non-integer IDs so they cannot be interpolated
+as untrusted text into shell steps.
+"""
+import csv
+import json
+import sys
+
+
+def main():
+    path = sys.argv[1] if len(sys.argv) > 1 else "src/data/parameter_db.csv"
+    ids = []
+    with open(path, newline="") as f:
+        reader = csv.DictReader(f)
+        if "ID" not in (reader.fieldnames or []):
+            sys.exit(f"{path}: missing 'ID' column")
+        for row in reader:
+            raw = row["ID"]
+            try:
+                ids.append(int(raw))
+            except (TypeError, ValueError):
+                sys.exit(
+                    f"ID {raw!r} in {path} is not an integer; "
+                    "refusing to expand matrix."
+                )
+    if len(set(ids)) != len(ids):
+        dupes = sorted({x for x in ids if ids.count(x) > 1})
+        sys.exit(f"{path}: duplicate IDs {dupes}; matrix shards would collide.")
+    print(json.dumps(ids))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/merge_estimates.py b/ci/merge_estimates.py
new file mode 100644
index 0000000..7097aed
--- /dev/null
+++ b/ci/merge_estimates.py
@@ -0,0 +1,59 @@
+"""Merge per-parameter partial estimate CSVs into the canonical
+estimates CSV. If --parameter-db is given, cross-check the merged ID
+set against it to catch a shard that uploaded a malformed CSV.
+"""
+import argparse
+import glob
+import sys
+
+import pandas as pd
+
+
+def _check_ids_match(merged: pd.DataFrame, parameter_db_path: str) -> None:
+    expected = pd.read_csv(parameter_db_path)
+    expected_ids = sorted(int(x) for x in expected["ID"].tolist())
+    got_ids = sorted(int(x) for x in merged["ID"].tolist())
+    if got_ids != expected_ids:
+        sys.exit(
+            f"merged ID set {got_ids} does not match parameter_db {expected_ids}"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "pattern",
+        help="Glob matching all partial CSVs to merge "
+        "(e.g. 'partials/estimates-*/partial-*.csv').",
+    )
+    parser.add_argument(
+        "--output",
+        default="src/data/lattice_estimator_estimates.csv",
+        help="Output CSV path.",
+    )
+    parser.add_argument(
+        "--parameter-db",
+        default=None,
+        help="If given, check the merged ID set against this parameter database.",
+    )
+    args = parser.parse_args()
+
+    paths = sorted(glob.glob(args.pattern))
+    if not paths:
+        sys.exit(f"no partial CSVs matched pattern: {args.pattern}")
+
+    merged = (
+        pd.concat([pd.read_csv(p) for p in paths], ignore_index=True)
+        .sort_values("ID")
+        .reset_index(drop=True)
+    )
+
+    if args.parameter_db is not None:
+        _check_ids_match(merged, args.parameter_db)
+
+    merged.to_csv(args.output, index=False)
+    print(merged.to_string(index=False))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/parameters_diverged.py b/ci/parameters_diverged.py
new file mode 100644
index 0000000..d396dc8
--- /dev/null
+++ b/ci/parameters_diverged.py
@@ -0,0 +1,36 @@
+"""Print 'true' if parameter_db.csv has changed since the commit that
+last updated the cached estimates CSV, else 'false'.
+
+Compares via git so any change to parameter_db.csv (parameter values,
+Origin URLs, whitespace) is caught. Requires fetch-depth: 0 on the
+caller's checkout. Fail-closed: any unexpected error prints 'true'
+so a corrupt repo state forces a refresh rather than silently skipping.
+"""
+import subprocess
+import sys
+
+DB = "src/data/parameter_db.csv"
+EST = "src/data/lattice_estimator_estimates.csv"
+
+
+def _run(cmd):
+    return subprocess.run(cmd, capture_output=True, text=True, check=True)
+
+
+def main():
+    try:
+        last = _run(["git", "log", "-1", "--format=%H", "--", EST]).stdout.strip()
+        if not last:
+            print("true")
+            return
+        diff = subprocess.run(
+            ["git", "diff", "--quiet", last, "--", DB], capture_output=True
+        )
+        print("true" if diff.returncode != 0 else "false")
+    except Exception as e:
+        print(f"parameters_diverged: {e}", file=sys.stderr)
+        print("true")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/estimate_security.py b/src/estimate_security.py
index 26fd957..a7719c5 100644
--- a/src/estimate_security.py
+++ b/src/estimate_security.py
@@ -1,4 +1,5 @@
 from multiprocessing import cpu_count
+import argparse
 import pandas as pd
 import time
 import platform
@@ -34,16 +35,31 @@ def estimate_security_lattice_estimator(logn, sigma, logq, h, m=oo, num_cores=1)
 
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run lattice-estimator security estimates over the parameter database.")
+    parser.add_argument("--ids", default=None,
+                        help="Comma-separated list of parameter IDs to estimate. Default: all rows.")
+    parser.add_argument("--output", default=None,
+                        help="Output CSV path. Default: src/data/<toolname>_estimates.csv.")
+    parser.add_argument("--jobs", type=int, default=None,
+                        help="Number of parallel attack workers passed to LWE.estimate(jobs=...). Default: cpu_count()//2.")
+    args = parser.parse_args()
+
     toolnames = ['lattice_estimator']
     parameters_db = pd.read_csv('src/data/parameter_db.csv')
 
-    # default to using half of the available cores
-    num_cores = cpu_count() // 2
-    # a string which specifies the machine used to generate estimates
-    machine_info = f"{platform.system()} {platform.release()},{platform.machine()},{num_cores} cores"
+    if args.ids is not None:
+        wanted = [int(x) for x in args.ids.split(",") if x.strip()]
+        parameters_db = parameters_db[parameters_db["ID"].isin(wanted)].reset_index(drop=True)
+        if parameters_db.empty:
+            raise SystemExit(f"No parameter rows match --ids={args.ids}")
+
+    jobs = args.jobs if args.jobs is not None else cpu_count() // 2
+    # `jobs` is the LWE.estimate parallelism, not the host's core count,
+    # so it is labelled explicitly.
+    machine_info = f"{platform.system()} {platform.release()},{platform.machine()},{jobs} jobs on {cpu_count()} cores"
 
     for toolname in toolnames:
-        tool_estimates = parameters_db.apply(lambda row: pd.Series(estimate_security(toolname, row[r"$\log_2(n)$"], row['σ'], row[r"$\log_2(q)$"], row[r"$h$"], num_cores=num_cores)), axis=1)
+        tool_estimates = parameters_db.apply(lambda row: pd.Series(estimate_security(toolname, row[r"$\log_2(n)$"], row['σ'], row[r"$\log_2(q)$"], row[r"$h$"], num_cores=jobs)), axis=1)
 
         tool_estimates = parameters_db.join(tool_estimates).drop(columns=['Origin'])
 
@@ -54,7 +70,8 @@ def estimate_security_lattice_estimator(logn, sigma, logq, h, m=oo, num_cores=1)
         tool_estimates["machine_info"] = machine_info
         tool_estimates["tool_commit"] = tool_commit
 
-        tool_estimates.to_csv(f"src/data/{toolname}_estimates.csv", index=False)
+        output_path = args.output if args.output is not None else f"src/data/{toolname}_estimates.csv"
+        tool_estimates.to_csv(output_path, index=False)
         # print(tool_estimates.to_markdown(index=False))