Mine GitHub user stats + deploy to Cloudflare #73
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Mine GitHub user stats + deploy to Cloudflare | |
| on: | |
| schedule: | |
| - cron: "0 6 * * *" | |
| workflow_dispatch: | |
| inputs: | |
| user: | |
| description: "Single GitHub login to mine (skips the full users.txt loop)." | |
| required: false | |
| push: | |
| branches: [main] | |
| paths: | |
| - "generate_stats.py" | |
| - "stats_template.html" | |
| - "cloudflare/**" | |
| - ".github/workflows/mine-and-deploy.yml" | |
| concurrency: | |
| group: mine-and-deploy | |
| cancel-in-progress: false | |
| jobs: | |
| mine-and-deploy: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write # for appending new users to users.txt | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.GH_MINING_TOKEN || github.token }} | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.13" | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: "24" | |
| # Persist per-user mining caches across runs so we don't re-fetch | |
| # PR details, commit stats, etc. that we've already pulled before. | |
| # Key includes the inputs.user (or 'full' for cron runs) so single- | |
| # user dispatches restore that user's cache specifically. | |
| - name: Restore mining caches | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| cache_*/api/ | |
| cache_*/bare/ | |
| cache/api/ | |
| cache/bare/ | |
| key: stats-cache-v1-${{ inputs.user || 'full' }}-${{ github.run_id }} | |
| restore-keys: | | |
| stats-cache-v1-${{ inputs.user || 'full' }}- | |
| stats-cache-v1- | |
| # Persist deployed user HTMLs across runs so that single-user | |
| # mining doesn't wipe other users from the CF bucket when wrangler | |
| # replaces the assets dir on deploy. Single shared key — every run | |
| # restores the latest snapshot of all deployed dashboards, adds / | |
| # refreshes its own user(s), and writes back the full set. | |
| # | |
| # Exclude committed dashboards (pirate, index, 404) from the cache | |
| # so old versions can't overwrite the just-checked-out repo files | |
| # when this step restores. We also explicitly re-apply them from | |
| # git after the restore (next step) — defends against the case | |
| # where the existing cache still contains them from a previous run. | |
| - name: Restore deployed dashboards | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| cloudflare/public/*.html | |
| !cloudflare/public/pirate.html | |
| !cloudflare/public/index.html | |
| !cloudflare/public/404.html | |
| key: deployed-htmls-v1-${{ github.run_id }} | |
| restore-keys: | | |
| deployed-htmls-v1- | |
| - name: Re-apply committed dashboards from git | |
| run: | | |
| git checkout HEAD -- \ | |
| cloudflare/public/pirate.html \ | |
| cloudflare/public/index.html \ | |
| cloudflare/public/404.html | |
| # Bootstrap from the live CF bucket: any user that's already | |
| # deployed in production but missing locally (because the previous | |
| # run was cancelled at the 6h cap and its post-step cache save | |
| # didn't run) gets downloaded here. Without this, the next run | |
| # would re-mine ~all users every time. | |
| - name: Bootstrap dashboards from deployed bucket | |
| run: | | |
| set +e | |
| deployed_json=$(curl -sSL --max-time 15 \ | |
| "https://githubusers.archivebox.io/deployed.json" 2>/dev/null) | |
| if [ -z "$deployed_json" ] || ! echo "$deployed_json" | python3 -c "import json,sys;json.load(sys.stdin)" >/dev/null 2>&1; then | |
| echo "No deployed.json available — skipping bootstrap" | |
| exit 0 | |
| fi | |
| users=$(echo "$deployed_json" \ | |
| | python3 -c "import json,sys; [print(u) for u in json.load(sys.stdin)]") | |
| n=0 | |
| for u in $users; do | |
| f="cloudflare/public/${u}.html" | |
| # Don't clobber pirate.html (committed/authoritative). | |
| [ "$u" = "pirate" ] && continue | |
| if [ ! -s "$f" ]; then | |
| if curl -sSL --max-time 30 --fail \ | |
| "https://githubusers.archivebox.io/${u}.html" \ | |
| -o "$f" 2>/dev/null; then | |
| n=$((n + 1)) | |
| else | |
| rm -f "$f" | |
| fi | |
| fi | |
| done | |
| echo "Bootstrapped $n dashboards from deployed bucket" | |
| - name: Install gh CLI | |
| run: | | |
| type -p gh >/dev/null || ( | |
| sudo apt-get update -qq && sudo apt-get install -y gh | |
| ) | |
| # gh authenticates via the GH_TOKEN environment variable (which we | |
| # already set on each step that calls it); no explicit `gh auth login` | |
| # step needed. | |
| - name: Determine target users | |
| id: targets | |
| working-directory: . | |
| env: | |
| INPUT_USER: ${{ inputs.user }} | |
| run: | | |
| set -e | |
| mkdir -p cloudflare/public | |
| # Build the list of users we'll mine THIS run. | |
| if [ -n "$INPUT_USER" ]; then | |
| echo "Single-user mine (forced): $INPUT_USER" | |
| # Persist new users into users.txt so future scheduled runs | |
| # include them. | |
| if ! grep -qiE "^${INPUT_USER}$" cloudflare/users.txt; then | |
| echo "$INPUT_USER" >> cloudflare/users.txt | |
| echo "added=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "$INPUT_USER" > /tmp/targets.txt | |
| else | |
| # Full mine: only mine users that don't have a deployed | |
| # dashboard yet. Once a dashboard exists, it stays put | |
| # until someone clicks the manual "Refresh" button (which | |
| # dispatches with inputs.user set). | |
| echo "Full mine of users.txt (skip already-deployed)" | |
| : > /tmp/targets.txt | |
| while IFS= read -r u || [ -n "$u" ]; do | |
| u="${u%%#*}" | |
| u="${u//[[:space:]]/}" | |
| [ -z "$u" ] && continue | |
| if [ -f "cloudflare/public/${u}.html" ]; then | |
| echo " skip @$u — dashboard already deployed" | |
| continue | |
| fi | |
| echo "$u" >> /tmp/targets.txt | |
| done < cloudflare/users.txt | |
| fi | |
| # Pre-stage pirate's enhanced version | |
| if [ -f stats.html ]; then | |
| cp stats.html cloudflare/public/pirate.html | |
| fi | |
| # Stage users.txt as a public asset so the Worker's dynamic / | |
| # handler can read it for the queued/mining list. Doing it here | |
| # (before the mining loop) means every interim deploy also has | |
| # a fresh users.txt available to the homepage. | |
| cp cloudflare/users.txt cloudflare/public/users.txt | |
| # Generate deployed.json — a JSON array of user logins whose | |
| # dashboard HTML is currently in /public. Updated on every | |
| # deploy_now() call below so the homepage stays accurate. | |
| ls cloudflare/public/*.html 2>/dev/null \ | |
| | sed -E 's|cloudflare/public/||;s|\.html$||' \ | |
| | grep -vE '^(index|404)$' \ | |
| | python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \ | |
| > cloudflare/public/deployed.json | |
| echo "Targets:" | |
| cat /tmp/targets.txt | |
| - name: Mine each user (with live in-progress deploys) | |
| working-directory: . | |
| env: | |
| NO_COLOR: "1" | |
| GH_TOKEN: ${{ secrets.GH_MINING_TOKEN }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| # Posted by generate_stats.py to /api/progress so the Worker's | |
| # loading page can render real-time phase info. | |
| STATS_PROGRESS_TOKEN: ${{ secrets.GH_MINING_TOKEN }} | |
| run: | | |
| set -e | |
| regen_manifest() { | |
| ls cloudflare/public/*.html 2>/dev/null \ | |
| | sed -E 's|cloudflare/public/||;s|\.html$||' \ | |
| | grep -vE '^(index|404)$' \ | |
| | python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \ | |
| > cloudflare/public/deployed.json | |
| } | |
| deploy_now() { | |
| regen_manifest | |
| (cd cloudflare && npx --yes wrangler@latest deploy --minify 2>&1 | | |
| tail -2) || echo "::warning::interim deploy failed" | |
| } | |
| watch_and_deploy() { | |
| # Watches stats_<user>.html every 30s while $1 (PID) is alive. | |
| # Copies any updated file into the deploy dir and re-deploys so | |
| # the live page shows partial data as mining progresses. | |
| local pid="$1" user="$2" src="stats_${user}.html" \ | |
| dst="cloudflare/public/${user}.html" last_mtime=0 | |
| while kill -0 "$pid" 2>/dev/null; do | |
| sleep 30 | |
| if [ -f "$src" ]; then | |
| local mtime | |
| mtime=$(stat -c %Y "$src" 2>/dev/null \ | |
| || stat -f %m "$src" 2>/dev/null || echo 0) | |
| if [ "$mtime" -gt "$last_mtime" ]; then | |
| cp "$src" "$dst" | |
| echo "::group::Interim deploy of @$user (live)" | |
| deploy_now | |
| echo "::endgroup::" | |
| last_mtime="$mtime" | |
| fi | |
| fi | |
| done | |
| } | |
| # Per-user wallclock cap. Some users have hundreds of repos and | |
| # cold-mining them takes forever; bounding to 25min/user keeps | |
| # the queue moving (partial data already deployed via the | |
| # watcher's interim deploys). | |
| USER_TIMEOUT=1500 | |
| while IFS= read -r user || [ -n "$user" ]; do | |
| user="${user%%#*}" | |
| user="${user//[[:space:]]/}" | |
| [ -z "$user" ] && continue | |
| [ "$user" = "pirate" ] && continue | |
| echo "::group::Mining @$user" | |
| # Run mining in the background; watch loop deploys partials. | |
| timeout --kill-after=30s "$USER_TIMEOUT" python3 \ | |
| generate_stats.py --user "$user" \ | |
| --no-search-commits \ | |
| --max-api-fetches 800 & | |
| MINE_PID=$! | |
| watch_and_deploy "$MINE_PID" "$user" & | |
| WATCH_PID=$! | |
| wait "$MINE_PID" \ | |
| || echo "::warning::mining @$user exited non-zero (timeout or error)" | |
| # Stop the watcher and do a final deploy with the final HTML. | |
| kill "$WATCH_PID" 2>/dev/null || true | |
| wait "$WATCH_PID" 2>/dev/null || true | |
| if [ -f "stats_$user.html" ]; then | |
| cp "stats_$user.html" "cloudflare/public/$user.html" | |
| echo "::group::Final deploy of @$user" | |
| deploy_now | |
| echo "::endgroup::" | |
| fi | |
| echo "::endgroup::" | |
| done < /tmp/targets.txt | |
| - name: Commit added users.txt entries | |
| if: steps.targets.outputs.added == 'true' | |
| working-directory: cloudflare | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
| git add users.txt | |
| git diff --staged --quiet || git commit -m "Add ${{ inputs.user }} to users.txt [skip ci]" | |
| git push || echo "::warning::push failed (no commit permission?)" | |
| - name: Final deploy | |
| working-directory: . | |
| run: | | |
| # Regenerate deployed.json one last time before the final push. | |
| ls cloudflare/public/*.html 2>/dev/null \ | |
| | sed -E 's|cloudflare/public/||;s|\.html$||' \ | |
| | grep -vE '^(index|404)$' \ | |
| | python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \ | |
| > cloudflare/public/deployed.json | |
| cd cloudflare && npx --yes wrangler@latest deploy | |
| env: | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} |