Add 8 popular GitHub users for prepopulation #43
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Mine GitHub user stats + deploy to Cloudflare | |
| on: | |
| schedule: | |
| - cron: "0 6 * * *" | |
| workflow_dispatch: | |
| inputs: | |
| user: | |
| description: "Single GitHub login to mine (skips the full users.txt loop)." | |
| required: false | |
| push: | |
| branches: [main] | |
| paths: | |
| - "generate_stats.py" | |
| - "stats_template.html" | |
| - "cloudflare/**" | |
| - ".github/workflows/mine-and-deploy.yml" | |
| concurrency: | |
| group: mine-and-deploy | |
| cancel-in-progress: false | |
| jobs: | |
| mine-and-deploy: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write # for appending new users to users.txt | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.GH_MINING_TOKEN || github.token }} | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.13" | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: "24" | |
| # Persist per-user mining caches across runs so we don't re-fetch | |
| # PR details, commit stats, etc. that we've already pulled before. | |
| # Key includes the inputs.user (or 'full' for cron runs) so single- | |
| # user dispatches restore that user's cache specifically. | |
| - name: Restore mining caches | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| cache_*/api/ | |
| cache_*/bare/ | |
| cache/api/ | |
| cache/bare/ | |
| key: stats-cache-v1-${{ inputs.user || 'full' }}-${{ github.run_id }} | |
| restore-keys: | | |
| stats-cache-v1-${{ inputs.user || 'full' }}- | |
| stats-cache-v1- | |
| # Persist deployed user HTMLs across runs so that single-user | |
| # mining doesn't wipe other users from the CF bucket when wrangler | |
| # replaces the assets dir on deploy. Single shared key — every run | |
| # restores the latest snapshot of all deployed dashboards, adds / | |
| # refreshes its own user(s), and writes back the full set. | |
| # | |
| # Exclude committed dashboards (pirate, index, 404) from the cache | |
| # so old versions can't overwrite the just-checked-out repo files | |
| # when this step restores. We also explicitly re-apply them from | |
| # git after the restore (next step) — defends against the case | |
| # where the existing cache still contains them from a previous run. | |
| - name: Restore deployed dashboards | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| cloudflare/public/*.html | |
| !cloudflare/public/pirate.html | |
| !cloudflare/public/index.html | |
| !cloudflare/public/404.html | |
| key: deployed-htmls-v1-${{ github.run_id }} | |
| restore-keys: | | |
| deployed-htmls-v1- | |
| - name: Re-apply committed dashboards from git | |
| run: | | |
| git checkout HEAD -- \ | |
| cloudflare/public/pirate.html \ | |
| cloudflare/public/index.html \ | |
| cloudflare/public/404.html | |
| - name: Install gh CLI | |
| run: | | |
| type -p gh >/dev/null || ( | |
| sudo apt-get update -qq && sudo apt-get install -y gh | |
| ) | |
| # gh authenticates via the GH_TOKEN environment variable (which we | |
| # already set on each step that calls it); no explicit `gh auth login` | |
| # step needed. | |
| - name: Determine target users | |
| id: targets | |
| working-directory: . | |
| env: | |
| INPUT_USER: ${{ inputs.user }} | |
| run: | | |
| set -e | |
| mkdir -p cloudflare/public | |
| # Build the list of users we'll mine THIS run. | |
| if [ -n "$INPUT_USER" ]; then | |
| echo "Single-user mine (forced): $INPUT_USER" | |
| # Persist new users into users.txt so future scheduled runs | |
| # include them. | |
| if ! grep -qiE "^${INPUT_USER}$" cloudflare/users.txt; then | |
| echo "$INPUT_USER" >> cloudflare/users.txt | |
| echo "added=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "$INPUT_USER" > /tmp/targets.txt | |
| else | |
| # Full mine: only mine users that don't have a deployed | |
| # dashboard yet. Once a dashboard exists, it stays put | |
| # until someone clicks the manual "Refresh" button (which | |
| # dispatches with inputs.user set). | |
| echo "Full mine of users.txt (skip already-deployed)" | |
| : > /tmp/targets.txt | |
| while IFS= read -r u || [ -n "$u" ]; do | |
| u="${u%%#*}" | |
| u="${u//[[:space:]]/}" | |
| [ -z "$u" ] && continue | |
| if [ -f "cloudflare/public/${u}.html" ]; then | |
| echo " skip @$u — dashboard already deployed" | |
| continue | |
| fi | |
| echo "$u" >> /tmp/targets.txt | |
| done < cloudflare/users.txt | |
| fi | |
| # Pre-stage pirate's enhanced version | |
| if [ -f stats.html ]; then | |
| cp stats.html cloudflare/public/pirate.html | |
| fi | |
| echo "Targets:" | |
| cat /tmp/targets.txt | |
| - name: Mine each user (with live in-progress deploys) | |
| working-directory: . | |
| env: | |
| NO_COLOR: "1" | |
| GH_TOKEN: ${{ secrets.GH_MINING_TOKEN }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| # Posted by generate_stats.py to /api/progress so the Worker's | |
| # loading page can render real-time phase info. | |
| STATS_PROGRESS_TOKEN: ${{ secrets.GH_MINING_TOKEN }} | |
| run: | | |
| set -e | |
| deploy_now() { | |
| (cd cloudflare && npx --yes wrangler@latest deploy --minify 2>&1 | | |
| tail -2) || echo "::warning::interim deploy failed" | |
| } | |
| watch_and_deploy() { | |
| # Watches stats_<user>.html every 30s while $1 (PID) is alive. | |
| # Copies any updated file into the deploy dir and re-deploys so | |
| # the live page shows partial data as mining progresses. | |
| local pid="$1" user="$2" src="stats_${user}.html" \ | |
| dst="cloudflare/public/${user}.html" last_mtime=0 | |
| while kill -0 "$pid" 2>/dev/null; do | |
| sleep 30 | |
| if [ -f "$src" ]; then | |
| local mtime | |
| mtime=$(stat -c %Y "$src" 2>/dev/null \ | |
| || stat -f %m "$src" 2>/dev/null || echo 0) | |
| if [ "$mtime" -gt "$last_mtime" ]; then | |
| cp "$src" "$dst" | |
| echo "::group::Interim deploy of @$user (live)" | |
| deploy_now | |
| echo "::endgroup::" | |
| last_mtime="$mtime" | |
| fi | |
| fi | |
| done | |
| } | |
| while IFS= read -r user || [ -n "$user" ]; do | |
| user="${user%%#*}" | |
| user="${user//[[:space:]]/}" | |
| [ -z "$user" ] && continue | |
| [ "$user" = "pirate" ] && continue | |
| echo "::group::Mining @$user" | |
| # Run mining in the background; watch loop deploys partials. | |
| python3 generate_stats.py --user "$user" \ | |
| --no-search-commits \ | |
| --max-api-fetches 1500 & | |
| MINE_PID=$! | |
| watch_and_deploy "$MINE_PID" "$user" & | |
| WATCH_PID=$! | |
| wait "$MINE_PID" || echo "::warning::mining @$user exited non-zero" | |
| # Stop the watcher and do a final deploy with the final HTML. | |
| kill "$WATCH_PID" 2>/dev/null || true | |
| wait "$WATCH_PID" 2>/dev/null || true | |
| if [ -f "stats_$user.html" ]; then | |
| cp "stats_$user.html" "cloudflare/public/$user.html" | |
| echo "::group::Final deploy of @$user" | |
| deploy_now | |
| echo "::endgroup::" | |
| fi | |
| echo "::endgroup::" | |
| done < /tmp/targets.txt | |
| - name: Commit added users.txt entries | |
| if: steps.targets.outputs.added == 'true' | |
| working-directory: cloudflare | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
| git add users.txt | |
| git diff --staged --quiet || git commit -m "Add ${{ inputs.user }} to users.txt [skip ci]" | |
| git push || echo "::warning::push failed (no commit permission?)" | |
| # Rebuild the homepage user list from the set of dashboards we | |
| # are actually about to deploy, so /<login> entries match reality | |
| # (and users added via the self-serve flow show up automatically). | |
| - name: Regenerate user index | |
| run: | | |
| python3 <<'PY' | |
| from pathlib import Path | |
| import re | |
| pub = Path("cloudflare/public") | |
| users = sorted( | |
| (p.stem for p in pub.glob("*.html") | |
| if p.stem not in {"index", "404"}), | |
| key=str.lower, | |
| ) | |
| rows = [] | |
| for u in users: | |
| if u == "pirate": | |
| rows.append(f' <li><a href="/{u}">/{u}</a> — Nick Sweeting (enhanced)</li>') | |
| else: | |
| rows.append(f' <li><a href="/{u}">/{u}</a></li>') | |
| html = (pub / "index.html").read_text() | |
| html = re.sub( | |
| r"<ul>.*?</ul>", | |
| "<ul>\n" + "\n".join(rows) + "\n </ul>", | |
| html, | |
| count=1, | |
| flags=re.S, | |
| ) | |
| # Update the "open a PR" footer to mention the self-serve flow. | |
| html = html.replace( | |
| 'Want to add yourself? <a href="https://github.com/ArchiveBox/githubusers/edit/main/cloudflare/users.txt">Open a PR on users.txt</a>.', | |
| 'Want your dashboard here? Just visit <code>/<your-login></code> — mining kicks off automatically.', | |
| ) | |
| (pub / "index.html").write_text(html) | |
| print(f"Wrote index.html with {len(users)} users: {users}") | |
| PY | |
| - name: Final deploy | |
| working-directory: cloudflare | |
| run: npx --yes wrangler@latest deploy | |
| env: | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} |