Daily Deception+ Analysis #43
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ============================================================================ | |
| # Daily Predict+ Analysis Workflow | |
| # ============================================================================ | |
| # Runs at 7:00 AM UTC daily to analyze the previous day's MLB games. | |
| # Uses per-pitcher models: each pitcher evaluated against their own patterns. | |
| # | |
| # Approach: | |
| # 1. Load each pitcher's last 500 pitches as training data | |
| # 2. Train individual model per pitcher | |
| # 3. Evaluate today's pitches against their own model | |
| # 4. Standardize using fixed baseline (baseline_params.rds) | |
| # | |
| # Outputs are committed back to the repository in: | |
| # output/{year}/{month}/{day}.csv | |
| # output/{year}/{month}/orioles_{day}.csv | |
| # output/{year}/{month}/visualizations/ | |
| # ============================================================================ | |
| name: Daily Predict+ Analysis | |
| on: | |
| # Run at 7:00 AM UTC daily (gives Statcast time to populate after games) | |
| schedule: | |
| - cron: '0 7 * * *' | |
| # Allow manual trigger with optional date override | |
| workflow_dispatch: | |
| inputs: | |
| analysis_date: | |
| description: 'Date to analyze (YYYY-MM-DD). Leave empty for yesterday.' | |
| required: false | |
| default: '' | |
| level: | |
| description: 'Level to analyze (MLB or AAA)' | |
| required: false | |
| default: 'MLB' | |
| # Prevent concurrent runs | |
| concurrency: | |
| group: daily-analysis | |
| cancel-in-progress: false | |
| jobs: | |
| # -------------------------------------------------------------------------- | |
| # Job 1: Lightweight check — are there regular season games today? | |
| # Calls the MLB Stats API schedule endpoint before any R setup. | |
| # If no games are found the analyze job is skipped entirely. | |
| # -------------------------------------------------------------------------- | |
| check-games: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| has_games: ${{ steps.schedule.outputs.has_games }} | |
| date: ${{ steps.date.outputs.date }} | |
| level: ${{ steps.date.outputs.level }} | |
| steps: | |
| - name: Determine analysis date | |
| id: date | |
| env: | |
| INPUT_DATE: ${{ github.event.inputs.analysis_date }} | |
| INPUT_LEVEL: ${{ github.event.inputs.level }} | |
| run: | | |
| if [ -n "$INPUT_DATE" ]; then | |
| # Validate date format (YYYY-MM-DD only) | |
| if echo "$INPUT_DATE" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}$'; then | |
| echo "date=$INPUT_DATE" >> $GITHUB_OUTPUT | |
| else | |
| echo "::error::Invalid date format: $INPUT_DATE (expected YYYY-MM-DD)" | |
| exit 1 | |
| fi | |
| else | |
| # Yesterday in UTC | |
| echo "date=$(date -u -d 'yesterday' '+%Y-%m-%d')" >> $GITHUB_OUTPUT | |
| fi | |
| if [ -n "$INPUT_LEVEL" ]; then | |
| # Validate level (MLB or AAA only) | |
| if [ "$INPUT_LEVEL" = "MLB" ] || [ "$INPUT_LEVEL" = "AAA" ]; then | |
| echo "level=$INPUT_LEVEL" >> $GITHUB_OUTPUT | |
| else | |
| echo "::error::Invalid level: $INPUT_LEVEL (expected MLB or AAA)" | |
| exit 1 | |
| fi | |
| else | |
| echo "level=MLB" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Check for regular season games | |
| id: schedule | |
| run: | | |
| DATE="${{ steps.date.outputs.date }}" | |
| echo "Checking MLB schedule for regular season games on ${DATE}..." | |
| RESPONSE=$(curl -sf \ | |
| "https://statsapi.mlb.com/api/v1/schedule?sportId=1&date=${DATE}&gameType=R" \ | |
| || echo '{"totalGames":0}') | |
| TOTAL=$(echo "$RESPONSE" | jq '.totalGames // 0') | |
| if [ "$TOTAL" -gt 0 ]; then | |
| echo "has_games=true" >> $GITHUB_OUTPUT | |
| echo "Found ${TOTAL} regular season game(s) on ${DATE} — proceeding with analysis." | |
| else | |
| echo "has_games=false" >> $GITHUB_OUTPUT | |
| echo "No regular season games on ${DATE} — skipping analysis." | |
| fi | |
| # -------------------------------------------------------------------------- | |
| # Job 2: Full analysis — only runs when regular season games exist | |
| # -------------------------------------------------------------------------- | |
| analyze: | |
| needs: check-games | |
| if: needs.check-games.outputs.has_games == 'true' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Full history for commits | |
| - name: Setup R | |
| uses: r-lib/actions/setup-r@v2 | |
| with: | |
| r-version: '4.3.0' | |
| use-public-rspm: true | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libfontconfig1-dev | |
| - name: Cache R packages | |
| uses: actions/cache@v4 | |
| with: | |
| path: ${{ env.R_LIBS_USER }} | |
| key: ${{ runner.os }}-r-${{ hashFiles('**/DESCRIPTION') }} | |
| restore-keys: | | |
| ${{ runner.os }}-r- | |
| - name: Install R packages | |
| env: | |
| GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| install.packages(c( | |
| "dplyr", "tidyr", "purrr", "stringr", "lubridate", | |
| "nnet", "readr", "tibble", "forcats", | |
| "httr", "jsonlite", "ggplot2", "scales", | |
| "showtext", "sysfonts" | |
| ), repos = "https://cloud.r-project.org") | |
| # Install sabRmetrics from GitHub | |
| install.packages("remotes") | |
| remotes::install_github("saberpowers/sabRmetrics", quiet = TRUE) | |
| shell: Rscript {0} | |
| - name: Cache Statcast data | |
| uses: actions/cache@v4 | |
| with: | |
| path: cache/ | |
| key: statcast-cache-${{ needs.check-games.outputs.date }} | |
| restore-keys: | | |
| statcast-cache- | |
| - name: Check for baseline parameters | |
| id: baseline | |
| run: | | |
| if [ -f "baseline_params.rds" ]; then | |
| echo "exists=true" >> $GITHUB_OUTPUT | |
| echo "Baseline parameters file found" | |
| else | |
| echo "exists=false" >> $GITHUB_OUTPUT | |
| echo "Warning: baseline_params.rds not found. Run compute_baseline.R first." | |
| echo "Daily analysis will use fallback values." | |
| fi | |
| - name: Run daily analysis | |
| id: analysis | |
| run: | | |
| Rscript run_daily.R ${{ needs.check-games.outputs.date }} --level ${{ needs.check-games.outputs.level }} | |
| continue-on-error: true | |
| - name: Check for outputs | |
| id: check_outputs | |
| run: | | |
| DATE="${{ needs.check-games.outputs.date }}" | |
| YEAR=$(date -d "$DATE" '+%Y') | |
| MONTH=$(date -d "$DATE" '+%B' | tr '[:upper:]' '[:lower:]') | |
| DAY=$(date -d "$DATE" '+%d') | |
| CSV_PATH="output/${YEAR}/${MONTH}/${DAY}.csv" | |
| if [ -f "$CSV_PATH" ]; then | |
| echo "has_output=true" >> $GITHUB_OUTPUT | |
| echo "csv_path=$CSV_PATH" >> $GITHUB_OUTPUT | |
| echo "year=$YEAR" >> $GITHUB_OUTPUT | |
| echo "month=$MONTH" >> $GITHUB_OUTPUT | |
| echo "day=$DAY" >> $GITHUB_OUTPUT | |
| else | |
| echo "has_output=false" >> $GITHUB_OUTPUT | |
| echo "No output CSV found for $DATE" | |
| fi | |
| - name: Commit and push results | |
| if: steps.check_outputs.outputs.has_output == 'true' | |
| run: | | |
| git config --local user.email "github-actions[bot]@users.noreply.github.com" | |
| git config --local user.name "github-actions[bot]" | |
| DATE="${{ needs.check-games.outputs.date }}" | |
| LEVEL="${{ needs.check-games.outputs.level }}" | |
| # Add output files and updated caches | |
| git add output/ cache/mlbam_name_cache.csv models/ || true | |
| # Check if there are changes to commit | |
| if git diff --staged --quiet; then | |
| echo "No changes to commit" | |
| else | |
| git commit -m "Daily analysis: ${LEVEL} ${DATE} | |
| - Added ${LEVEL} Predict+ analysis for ${DATE} | |
| - Generated social media graphics | |
| - Updated pitcher name cache" | |
| git push | |
| fi | |
| - name: Create summary | |
| if: always() | |
| run: | | |
| DATE="${{ needs.check-games.outputs.date }}" | |
| LEVEL="${{ needs.check-games.outputs.level }}" | |
| echo "## Daily Predict+ Analysis Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Date:** ${DATE}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Level:** ${LEVEL}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ "${{ steps.check_outputs.outputs.has_output }}" == "true" ]; then | |
| CSV="${{ steps.check_outputs.outputs.csv_path }}" | |
| PITCHERS=$(wc -l < "$CSV") | |
| PITCHERS=$((PITCHERS - 1)) # Subtract header | |
| echo "**Status:** Success" >> $GITHUB_STEP_SUMMARY | |
| echo "**Pitchers analyzed:** ${PITCHERS}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Output Files" >> $GITHUB_STEP_SUMMARY | |
| echo "- CSV: \`${CSV}\`" >> $GITHUB_STEP_SUMMARY | |
| echo "- Visualizations: \`output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/visualizations/\`" >> $GITHUB_STEP_SUMMARY | |
| # Orioles pitcher summary (MLB only) | |
| if [ "${LEVEL}" == "MLB" ]; then | |
| YEAR=${{ steps.check_outputs.outputs.year }} | |
| MONTH=${{ steps.check_outputs.outputs.month }} | |
| DAY=$(date -d "$DATE" '+%d') | |
| ORIOLES_CSV="output/${YEAR}/${MONTH}/orioles_${DAY}.csv" | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Baltimore Orioles Pitcher Predict+ Scores (≥ 10 pitches)" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ -f "$ORIOLES_CSV" ]; then | |
| python3 scripts/orioles_summary.py "$ORIOLES_CSV" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "No Orioles pitchers appeared (or no team data available)." >> $GITHUB_STEP_SUMMARY | |
| fi | |
| fi | |
| else | |
| echo "**Status:** No output generated" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| - name: Upload MLB all-pitchers artifact | |
| if: steps.check_outputs.outputs.has_output == 'true' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: mlb-all-pitchers-${{ needs.check-games.outputs.date }} | |
| path: | | |
| ${{ steps.check_outputs.outputs.csv_path }} | |
| output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/visualizations/ | |
| retention-days: 30 | |
| - name: Upload Orioles artifact | |
| if: steps.check_outputs.outputs.has_output == 'true' && needs.check-games.outputs.level == 'MLB' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: orioles-pitchers-${{ needs.check-games.outputs.date }} | |
| path: | | |
| output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/orioles_${{ steps.check_outputs.outputs.day }}.csv | |
| retention-days: 30 |