From 4dc1a066aeaac6c61a3df9be0f38e335ec3be695 Mon Sep 17 00:00:00 2001 From: ryanorendorff Date: Sun, 24 May 2026 20:45:50 -0600 Subject: [PATCH] Add monthly CI to refresh README Bumps the lattice-estimator submodule and refreshes README.md and the estimates CSV via a PR when the submodule or parameter_db.csv has changed. Runs monthly and on workflow_dispatch, and will only fire upstream once this branch is on the default branch. --- .github/workflows/refresh-readme.yml | 271 +++++++++++++++++++++++++++ .gitmodules | 1 + Makefile | 20 +- ci/list_parameter_ids.py | 34 ++++ ci/merge_estimates.py | 59 ++++++ ci/parameters_diverged.py | 36 ++++ src/estimate_security.py | 29 ++- 7 files changed, 434 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/refresh-readme.yml create mode 100644 ci/list_parameter_ids.py create mode 100644 ci/merge_estimates.py create mode 100644 ci/parameters_diverged.py diff --git a/.github/workflows/refresh-readme.yml b/.github/workflows/refresh-readme.yml new file mode 100644 index 0000000..51e54f8 --- /dev/null +++ b/.github/workflows/refresh-readme.yml @@ -0,0 +1,271 @@ +name: Refresh README + +on: + schedule: + # 06:00 UTC on the 1st of each month. + - cron: "0 6 1 * *" + workflow_dispatch: + +# Read-only by default. Only the commit job upgrades to write. +permissions: + contents: read + +concurrency: + group: refresh-readme-${{ github.ref }} + cancel-in-progress: false + +jobs: + prepare: + runs-on: ubuntu-24.04 + outputs: + sha: ${{ steps.bump.outputs.sha }} + short_sha: ${{ steps.bump.outputs.short_sha }} + ids: ${{ steps.matrix.outputs.ids }} + changed: ${{ steps.changed.outputs.changed }} + steps: + - name: Checkout (with submodules, full history) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: recursive + # fetch-depth: 0 so the change-detection step can ask git + # which commit last touched the estimates CSV. + fetch-depth: 0 + + - name: Bump lattice-estimator submodule to upstream HEAD + id: bump + run: | + set -euo pipefail + git submodule update --remote --recursive src/lattice_estimator + sha=$(git -C src/lattice_estimator rev-parse HEAD) + short=$(git -C src/lattice_estimator rev-parse --short HEAD) + echo "lattice-estimator HEAD: $sha" + { + echo "sha=$sha" + echo "short_sha=$short" + } >> "$GITHUB_OUTPUT" + + - name: Detect whether a refresh is needed + id: changed + run: | + set -euo pipefail + committed=$(git ls-tree HEAD src/lattice_estimator | awk '{print $3}') + bumped="${{ steps.bump.outputs.sha }}" + result="false" + if [[ "$bumped" != "$committed" ]]; then + echo "estimator moved $committed -> $bumped" + result="true" + elif drift=$(python3 ci/parameters_diverged.py); then + if [[ "$drift" == "true" ]]; then + echo "parameter_db.csv differs from cached estimates" + result="true" + else + echo "estimator and parameter_db unchanged — downstream jobs will skip" + fi + else + echo "parameters_diverged.py crashed — assuming refresh needed" + result="true" + fi + echo "changed=$result" >> "$GITHUB_OUTPUT" + + - name: Compute shard matrix from parameter_db.csv + id: matrix + run: | + set -euo pipefail + ids=$(python3 ci/list_parameter_ids.py) + echo "matrix ids: $ids" + echo "ids=$ids" >> "$GITHUB_OUTPUT" + + estimate: + needs: prepare + if: needs.prepare.outputs.changed == 'true' + runs-on: ubuntu-24.04 + permissions: {} # no token; this job runs upstream estimator code + container: + image: sagemath/sagemath:latest + options: --user root + defaults: + run: + # The sagemath container's default shell is dash, which does not + # support `set -o pipefail`. Force bash for all run steps in this job. + shell: bash + strategy: + fail-fast: false + matrix: + id: ${{ fromJson(needs.prepare.outputs.ids) }} + timeout-minutes: 60 + steps: + - name: Install git in container + run: | + apt-get update -qq + apt-get install -y --no-install-recommends git ca-certificates + + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Mark workspace as a safe git directory + run: | + git config --global --add safe.directory "$GITHUB_WORKSPACE" + git config --global --add safe.directory "$GITHUB_WORKSPACE/src/lattice_estimator" + + - name: Clone lattice-estimator at the prepared SHA + env: + SUBMODULE_SHA: ${{ needs.prepare.outputs.sha }} + run: | + set -euo pipefail + rm -rf src/lattice_estimator + git clone https://github.com/malb/lattice-estimator.git src/lattice_estimator + git -C src/lattice_estimator checkout "$SUBMODULE_SHA" + + - name: Install Python deps into Sage's interpreter + run: sage --pip install --no-warn-script-location -r src/requirements.txt + + - name: Estimate parameter set + env: + MATRIX_ID: ${{ matrix.id }} + run: | + set -euo pipefail + mkdir -p out + sage --python src/estimate_security.py \ + --ids "$MATRIX_ID" \ + --output "out/partial-$MATRIX_ID.csv" \ + --jobs 2 + + - name: Show estimates + if: always() + env: + MATRIX_ID: ${{ matrix.id }} + run: | + echo "::group::Estimates for parameter set $MATRIX_ID" + if [[ -f "out/partial-$MATRIX_ID.csv" ]]; then + (command -v column >/dev/null && column -s, -t "out/partial-$MATRIX_ID.csv") || cat "out/partial-$MATRIX_ID.csv" + else + echo "(no partial CSV produced — estimator failed before writing output)" + fi + echo "::endgroup::" + + - name: Upload partial CSV + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: estimates-${{ matrix.id }} + path: out/partial-${{ matrix.id }}.csv + if-no-files-found: error + retention-days: 7 + + # Tokenless: merges partial CSVs and regenerates README. The output + # README + canonical CSV are passed to the commit job as an artifact so + # the privileged job only handles git/PR mechanics. + build: + needs: [prepare, estimate] + if: needs.prepare.outputs.changed == 'true' + runs-on: ubuntu-24.04 + permissions: {} + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Create venv and install Python deps + run: | + set -euo pipefail + python3 -m venv .venv + .venv/bin/pip install --quiet -r src/requirements.txt + + - name: Download all partial CSVs (scoped to this run) + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: partials + pattern: estimates-* + run-id: ${{ github.run_id }} + github-token: ${{ github.token }} + + - name: Merge partial CSVs into the canonical estimates file + run: | + .venv/bin/python ci/merge_estimates.py 'partials/estimates-*/partial-*.csv' \ + --parameter-db src/data/parameter_db.csv + + - name: Regenerate README + run: PATH="$PWD/.venv/bin:$PATH" make readme + + - name: Upload regenerated README + estimates CSV + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: refreshed-readme + path: | + README.md + src/data/lattice_estimator_estimates.csv + if-no-files-found: error + retention-days: 7 + + # Minimal write-permission job: pins the submodule, copies the artifact + # from `build` into place, commits to a chore branch, and opens a PR. + # Does not run estimator or pandas code. + commit: + needs: [prepare, build] + if: needs.prepare.outputs.changed == 'true' + runs-on: ubuntu-24.04 + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout (full history, with submodules) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: recursive + fetch-depth: 0 + + - name: Pin lattice-estimator submodule to prepare's SHA + env: + SUBMODULE_SHA: ${{ needs.prepare.outputs.sha }} + run: | + set -euo pipefail + cd src/lattice_estimator + git fetch --unshallow origin 2>/dev/null || git fetch origin + git checkout "$SUBMODULE_SHA" + + - name: Download regenerated README + CSV + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: refreshed-readme + run-id: ${{ github.run_id }} + github-token: ${{ github.token }} + + - name: Open PR if anything changed + env: + GH_TOKEN: ${{ github.token }} + SHORT_SHA: ${{ needs.prepare.outputs.short_sha }} + BASE_BRANCH: ${{ github.ref_name }} + run: | + set -euo pipefail + + # Check only the paths we actually commit (artifact download + # leaves untracked files that would otherwise mask the no-op path). + tracked_paths=(src/lattice_estimator src/data/lattice_estimator_estimates.csv README.md) + if [[ -z "$(git status --porcelain -- "${tracked_paths[@]}")" ]]; then + echo "No changes to estimator pin, estimates, or README — nothing to commit." + exit 0 + fi + + branch="chore/refresh-readme-$(date -u +%Y%m%d)-${GITHUB_RUN_ID}" + + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + git checkout -b "$branch" + git add -- "${tracked_paths[@]}" + git commit -m "chore: monthly estimator refresh (lattice-estimator ${SHORT_SHA})" + git push origin "$branch" + + # Body is built via printf with positional args so no shell + # expansion of attacker-controllable values happens in a heredoc. + run_url="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + body=$(printf 'Automated refresh from the `Refresh README` workflow.\n\n- lattice-estimator submodule resolved to upstream main HEAD: `%s`\n- Re-ran the estimator across all parameter sets in `src/data/parameter_db.csv`\n- Regenerated `README.md` from the refreshed CSV\n\nTriggered by `%s` against base `%s` (run [#%s](%s)).\n' \ + "$SHORT_SHA" "$GITHUB_EVENT_NAME" "$BASE_BRANCH" "$GITHUB_RUN_ID" "$run_url") + + gh pr create \ + --base "$BASE_BRANCH" \ + --head "$branch" \ + --title "chore: monthly estimator refresh (lattice-estimator ${SHORT_SHA})" \ + --body "$body" diff --git a/.gitmodules b/.gitmodules index 8b5bd1d..1260dfa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "src/lattice_estimator"] path = src/lattice_estimator url = https://github.com/malb/lattice-estimator.git + branch = main diff --git a/Makefile b/Makefile index eefdd14..ad9a89b 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,20 @@ all: python3 src/estimate_security.py - cat src/markdown/todo.md > README.md - cat src/markdown/part1.md >> README.md + awk 1 src/markdown/todo.md > README.md + awk 1 src/markdown/part1.md >> README.md python3 src/gen_attack_table.py >> README.md - cat src/markdown/part2.md >> README.md + awk 1 src/markdown/part2.md >> README.md python3 src/gen_parameter_table.py >> README.md - cat src/markdown/part3.md >> README.md + awk 1 src/markdown/part3.md >> README.md python3 src/gen_security_estimation_table.py >> README.md - cat src/markdown/part4.md >> README.md + awk 1 src/markdown/part4.md >> README.md readme: - cat src/markdown/todo.md > README.md - cat src/markdown/part1.md >> README.md + awk 1 src/markdown/todo.md > README.md + awk 1 src/markdown/part1.md >> README.md python3 src/gen_attack_table.py >> README.md - cat src/markdown/part2.md >> README.md + awk 1 src/markdown/part2.md >> README.md python3 src/gen_parameter_table.py >> README.md - cat src/markdown/part3.md >> README.md + awk 1 src/markdown/part3.md >> README.md python3 src/gen_security_estimation_table.py >> README.md - cat src/markdown/part4.md >> README.md + awk 1 src/markdown/part4.md >> README.md diff --git a/ci/list_parameter_ids.py b/ci/list_parameter_ids.py new file mode 100644 index 0000000..8bc9a6b --- /dev/null +++ b/ci/list_parameter_ids.py @@ -0,0 +1,34 @@ +"""Emit parameter_db.csv's ID column as a JSON array for the CI matrix. + +Integer coercion rejects non-integer IDs so they cannot be interpolated +as untrusted text into shell steps. +""" +import csv +import json +import sys + + +def main(): + path = sys.argv[1] if len(sys.argv) > 1 else "src/data/parameter_db.csv" + ids = [] + with open(path, newline="") as f: + reader = csv.DictReader(f) + if "ID" not in (reader.fieldnames or []): + sys.exit(f"{path}: missing 'ID' column") + for row in reader: + raw = row["ID"] + try: + ids.append(int(raw)) + except (TypeError, ValueError): + sys.exit( + f"ID {raw!r} in {path} is not an integer; " + "refusing to expand matrix." + ) + if len(set(ids)) != len(ids): + dupes = sorted({x for x in ids if ids.count(x) > 1}) + sys.exit(f"{path}: duplicate IDs {dupes}; matrix shards would collide.") + print(json.dumps(ids)) + + +if __name__ == "__main__": + main() diff --git a/ci/merge_estimates.py b/ci/merge_estimates.py new file mode 100644 index 0000000..7097aed --- /dev/null +++ b/ci/merge_estimates.py @@ -0,0 +1,59 @@ +"""Merge per-parameter partial estimate CSVs into the canonical +estimates CSV. If --parameter-db is given, cross-check the merged ID +set against it to catch a shard that uploaded a malformed CSV. +""" +import argparse +import glob +import sys + +import pandas as pd + + +def _check_ids_match(merged: pd.DataFrame, parameter_db_path: str) -> None: + expected = pd.read_csv(parameter_db_path) + expected_ids = sorted(int(x) for x in expected["ID"].tolist()) + got_ids = sorted(int(x) for x in merged["ID"].tolist()) + if got_ids != expected_ids: + sys.exit( + f"merged ID set {got_ids} does not match parameter_db {expected_ids}" + ) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "pattern", + help="Glob matching all partial CSVs to merge " + "(e.g. 'partials/estimates-*/partial-*.csv').", + ) + parser.add_argument( + "--output", + default="src/data/lattice_estimator_estimates.csv", + help="Output CSV path.", + ) + parser.add_argument( + "--parameter-db", + default=None, + help="If given, check the merged ID set against this parameter database.", + ) + args = parser.parse_args() + + paths = sorted(glob.glob(args.pattern)) + if not paths: + sys.exit(f"no partial CSVs matched pattern: {args.pattern}") + + merged = ( + pd.concat([pd.read_csv(p) for p in paths], ignore_index=True) + .sort_values("ID") + .reset_index(drop=True) + ) + + if args.parameter_db is not None: + _check_ids_match(merged, args.parameter_db) + + merged.to_csv(args.output, index=False) + print(merged.to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/ci/parameters_diverged.py b/ci/parameters_diverged.py new file mode 100644 index 0000000..d396dc8 --- /dev/null +++ b/ci/parameters_diverged.py @@ -0,0 +1,36 @@ +"""Print 'true' if parameter_db.csv has changed since the commit that +last updated the cached estimates CSV, else 'false'. + +Compares via git so any change to parameter_db.csv (parameter values, +Origin URLs, whitespace) is caught. Requires fetch-depth: 0 on the +caller's checkout. Fail-closed: any unexpected error prints 'true' +so a corrupt repo state forces a refresh rather than silently skipping. +""" +import subprocess +import sys + +DB = "src/data/parameter_db.csv" +EST = "src/data/lattice_estimator_estimates.csv" + + +def _run(cmd): + return subprocess.run(cmd, capture_output=True, text=True, check=True) + + +def main(): + try: + last = _run(["git", "log", "-1", "--format=%H", "--", EST]).stdout.strip() + if not last: + print("true") + return + diff = subprocess.run( + ["git", "diff", "--quiet", last, "--", DB], capture_output=True + ) + print("true" if diff.returncode != 0 else "false") + except Exception as e: + print(f"parameters_diverged: {e}", file=sys.stderr) + print("true") + + +if __name__ == "__main__": + main() diff --git a/src/estimate_security.py b/src/estimate_security.py index 26fd957..a7719c5 100644 --- a/src/estimate_security.py +++ b/src/estimate_security.py @@ -1,4 +1,5 @@ from multiprocessing import cpu_count +import argparse import pandas as pd import time import platform @@ -34,16 +35,31 @@ def estimate_security_lattice_estimator(logn, sigma, logq, h, m=oo, num_cores=1) if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run lattice-estimator security estimates over the parameter database.") + parser.add_argument("--ids", default=None, + help="Comma-separated list of parameter IDs to estimate. Default: all rows.") + parser.add_argument("--output", default=None, + help="Output CSV path. Default: src/data/_estimates.csv.") + parser.add_argument("--jobs", type=int, default=None, + help="Number of parallel attack workers passed to LWE.estimate(jobs=...). Default: cpu_count()//2.") + args = parser.parse_args() + toolnames = ['lattice_estimator'] parameters_db = pd.read_csv('src/data/parameter_db.csv') - # default to using half of the available cores - num_cores = cpu_count() // 2 - # a string which specifies the machine used to generate estimates - machine_info = f"{platform.system()} {platform.release()},{platform.machine()},{num_cores} cores" + if args.ids is not None: + wanted = [int(x) for x in args.ids.split(",") if x.strip()] + parameters_db = parameters_db[parameters_db["ID"].isin(wanted)].reset_index(drop=True) + if parameters_db.empty: + raise SystemExit(f"No parameter rows match --ids={args.ids}") + + jobs = args.jobs if args.jobs is not None else cpu_count() // 2 + # `jobs` is the LWE.estimate parallelism, not the host's core count, + # so it is labelled explicitly. + machine_info = f"{platform.system()} {platform.release()},{platform.machine()},{jobs} jobs on {cpu_count()} cores" for toolname in toolnames: - tool_estimates = parameters_db.apply(lambda row: pd.Series(estimate_security(toolname, row[r"$\log_2(n)$"], row['σ'], row[r"$\log_2(q)$"], row[r"$h$"], num_cores=num_cores)), axis=1) + tool_estimates = parameters_db.apply(lambda row: pd.Series(estimate_security(toolname, row[r"$\log_2(n)$"], row['σ'], row[r"$\log_2(q)$"], row[r"$h$"], num_cores=jobs)), axis=1) tool_estimates = parameters_db.join(tool_estimates).drop(columns=['Origin']) @@ -54,7 +70,8 @@ def estimate_security_lattice_estimator(logn, sigma, logq, h, m=oo, num_cores=1) tool_estimates["machine_info"] = machine_info tool_estimates["tool_commit"] = tool_commit - tool_estimates.to_csv(f"src/data/{toolname}_estimates.csv", index=False) + output_path = args.output if args.output is not None else f"src/data/{toolname}_estimates.csv" + tool_estimates.to_csv(output_path, index=False) # print(tool_estimates.to_markdown(index=False))