Skip to content

Daily Deception+ Analysis #43

Daily Deception+ Analysis

Daily Deception+ Analysis #43

# ============================================================================
# Daily Predict+ Analysis Workflow
# ============================================================================
# Runs at 7:00 AM UTC daily to analyze the previous day's MLB games.
# Uses per-pitcher models: each pitcher evaluated against their own patterns.
#
# Approach:
# 1. Load each pitcher's last 500 pitches as training data
# 2. Train individual model per pitcher
# 3. Evaluate today's pitches against their own model
# 4. Standardize using fixed baseline (baseline_params.rds)
#
# Outputs are committed back to the repository in:
# output/{year}/{month}/{day}.csv
# output/{year}/{month}/orioles_{day}.csv
# output/{year}/{month}/visualizations/
# ============================================================================
name: Daily Predict+ Analysis
on:
# Run at 7:00 AM UTC daily (gives Statcast time to populate after games)
schedule:
- cron: '0 7 * * *'
# Allow manual trigger with optional date override
workflow_dispatch:
inputs:
analysis_date:
description: 'Date to analyze (YYYY-MM-DD). Leave empty for yesterday.'
required: false
default: ''
level:
description: 'Level to analyze (MLB or AAA)'
required: false
default: 'MLB'
# Prevent concurrent runs
concurrency:
group: daily-analysis
cancel-in-progress: false
jobs:
# --------------------------------------------------------------------------
# Job 1: Lightweight check — are there regular season games today?
# Calls the MLB Stats API schedule endpoint before any R setup.
# If no games are found the analyze job is skipped entirely.
# --------------------------------------------------------------------------
check-games:
runs-on: ubuntu-latest
outputs:
has_games: ${{ steps.schedule.outputs.has_games }}
date: ${{ steps.date.outputs.date }}
level: ${{ steps.date.outputs.level }}
steps:
- name: Determine analysis date
id: date
env:
INPUT_DATE: ${{ github.event.inputs.analysis_date }}
INPUT_LEVEL: ${{ github.event.inputs.level }}
run: |
if [ -n "$INPUT_DATE" ]; then
# Validate date format (YYYY-MM-DD only)
if echo "$INPUT_DATE" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}$'; then
echo "date=$INPUT_DATE" >> $GITHUB_OUTPUT
else
echo "::error::Invalid date format: $INPUT_DATE (expected YYYY-MM-DD)"
exit 1
fi
else
# Yesterday in UTC
echo "date=$(date -u -d 'yesterday' '+%Y-%m-%d')" >> $GITHUB_OUTPUT
fi
if [ -n "$INPUT_LEVEL" ]; then
# Validate level (MLB or AAA only)
if [ "$INPUT_LEVEL" = "MLB" ] || [ "$INPUT_LEVEL" = "AAA" ]; then
echo "level=$INPUT_LEVEL" >> $GITHUB_OUTPUT
else
echo "::error::Invalid level: $INPUT_LEVEL (expected MLB or AAA)"
exit 1
fi
else
echo "level=MLB" >> $GITHUB_OUTPUT
fi
- name: Check for regular season games
id: schedule
run: |
DATE="${{ steps.date.outputs.date }}"
echo "Checking MLB schedule for regular season games on ${DATE}..."
RESPONSE=$(curl -sf \
"https://statsapi.mlb.com/api/v1/schedule?sportId=1&date=${DATE}&gameType=R" \
|| echo '{"totalGames":0}')
TOTAL=$(echo "$RESPONSE" | jq '.totalGames // 0')
if [ "$TOTAL" -gt 0 ]; then
echo "has_games=true" >> $GITHUB_OUTPUT
echo "Found ${TOTAL} regular season game(s) on ${DATE} — proceeding with analysis."
else
echo "has_games=false" >> $GITHUB_OUTPUT
echo "No regular season games on ${DATE} — skipping analysis."
fi
# --------------------------------------------------------------------------
# Job 2: Full analysis — only runs when regular season games exist
# --------------------------------------------------------------------------
analyze:
needs: check-games
if: needs.check-games.outputs.has_games == 'true'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for commits
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: '4.3.0'
use-public-rspm: true
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libfontconfig1-dev
- name: Cache R packages
uses: actions/cache@v4
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-r-${{ hashFiles('**/DESCRIPTION') }}
restore-keys: |
${{ runner.os }}-r-
- name: Install R packages
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
run: |
install.packages(c(
"dplyr", "tidyr", "purrr", "stringr", "lubridate",
"nnet", "readr", "tibble", "forcats",
"httr", "jsonlite", "ggplot2", "scales",
"showtext", "sysfonts"
), repos = "https://cloud.r-project.org")
# Install sabRmetrics from GitHub
install.packages("remotes")
remotes::install_github("saberpowers/sabRmetrics", quiet = TRUE)
shell: Rscript {0}
- name: Cache Statcast data
uses: actions/cache@v4
with:
path: cache/
key: statcast-cache-${{ needs.check-games.outputs.date }}
restore-keys: |
statcast-cache-
- name: Check for baseline parameters
id: baseline
run: |
if [ -f "baseline_params.rds" ]; then
echo "exists=true" >> $GITHUB_OUTPUT
echo "Baseline parameters file found"
else
echo "exists=false" >> $GITHUB_OUTPUT
echo "Warning: baseline_params.rds not found. Run compute_baseline.R first."
echo "Daily analysis will use fallback values."
fi
- name: Run daily analysis
id: analysis
run: |
Rscript run_daily.R ${{ needs.check-games.outputs.date }} --level ${{ needs.check-games.outputs.level }}
continue-on-error: true
- name: Check for outputs
id: check_outputs
run: |
DATE="${{ needs.check-games.outputs.date }}"
YEAR=$(date -d "$DATE" '+%Y')
MONTH=$(date -d "$DATE" '+%B' | tr '[:upper:]' '[:lower:]')
DAY=$(date -d "$DATE" '+%d')
CSV_PATH="output/${YEAR}/${MONTH}/${DAY}.csv"
if [ -f "$CSV_PATH" ]; then
echo "has_output=true" >> $GITHUB_OUTPUT
echo "csv_path=$CSV_PATH" >> $GITHUB_OUTPUT
echo "year=$YEAR" >> $GITHUB_OUTPUT
echo "month=$MONTH" >> $GITHUB_OUTPUT
echo "day=$DAY" >> $GITHUB_OUTPUT
else
echo "has_output=false" >> $GITHUB_OUTPUT
echo "No output CSV found for $DATE"
fi
- name: Commit and push results
if: steps.check_outputs.outputs.has_output == 'true'
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
DATE="${{ needs.check-games.outputs.date }}"
LEVEL="${{ needs.check-games.outputs.level }}"
# Add output files and updated caches
git add output/ cache/mlbam_name_cache.csv models/ || true
# Check if there are changes to commit
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Daily analysis: ${LEVEL} ${DATE}
- Added ${LEVEL} Predict+ analysis for ${DATE}
- Generated social media graphics
- Updated pitcher name cache"
git push
fi
- name: Create summary
if: always()
run: |
DATE="${{ needs.check-games.outputs.date }}"
LEVEL="${{ needs.check-games.outputs.level }}"
echo "## Daily Predict+ Analysis Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Date:** ${DATE}" >> $GITHUB_STEP_SUMMARY
echo "**Level:** ${LEVEL}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ "${{ steps.check_outputs.outputs.has_output }}" == "true" ]; then
CSV="${{ steps.check_outputs.outputs.csv_path }}"
PITCHERS=$(wc -l < "$CSV")
PITCHERS=$((PITCHERS - 1)) # Subtract header
echo "**Status:** Success" >> $GITHUB_STEP_SUMMARY
echo "**Pitchers analyzed:** ${PITCHERS}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Output Files" >> $GITHUB_STEP_SUMMARY
echo "- CSV: \`${CSV}\`" >> $GITHUB_STEP_SUMMARY
echo "- Visualizations: \`output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/visualizations/\`" >> $GITHUB_STEP_SUMMARY
# Orioles pitcher summary (MLB only)
if [ "${LEVEL}" == "MLB" ]; then
YEAR=${{ steps.check_outputs.outputs.year }}
MONTH=${{ steps.check_outputs.outputs.month }}
DAY=$(date -d "$DATE" '+%d')
ORIOLES_CSV="output/${YEAR}/${MONTH}/orioles_${DAY}.csv"
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Baltimore Orioles Pitcher Predict+ Scores (≥ 10 pitches)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f "$ORIOLES_CSV" ]; then
python3 scripts/orioles_summary.py "$ORIOLES_CSV" >> $GITHUB_STEP_SUMMARY
else
echo "No Orioles pitchers appeared (or no team data available)." >> $GITHUB_STEP_SUMMARY
fi
fi
else
echo "**Status:** No output generated" >> $GITHUB_STEP_SUMMARY
fi
- name: Upload MLB all-pitchers artifact
if: steps.check_outputs.outputs.has_output == 'true'
uses: actions/upload-artifact@v4
with:
name: mlb-all-pitchers-${{ needs.check-games.outputs.date }}
path: |
${{ steps.check_outputs.outputs.csv_path }}
output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/visualizations/
retention-days: 30
- name: Upload Orioles artifact
if: steps.check_outputs.outputs.has_output == 'true' && needs.check-games.outputs.level == 'MLB'
uses: actions/upload-artifact@v4
with:
name: orioles-pitchers-${{ needs.check-games.outputs.date }}
path: |
output/${{ steps.check_outputs.outputs.year }}/${{ steps.check_outputs.outputs.month }}/orioles_${{ steps.check_outputs.outputs.day }}.csv
retention-days: 30