Skip to content

Runner Heartbeat

Runner Heartbeat #1

name: Runner Heartbeat
on:
schedule:
- cron: '0 0 */7 * *' # Every 7 days at midnight UTC
workflow_dispatch:
concurrency:
group: runner-heartbeat
cancel-in-progress: true
permissions:
contents: read
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
jobs:
start-builder:
name: Start areal-docker-builder instance
runs-on: ubuntu-latest
env:
INSTANCE_NAME: areal-docker-builder
INSTANCE_ZONE: us-central1-f
steps:
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v3
with:
credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v3
- name: Start builder instance if stopped
run: |
set -euo pipefail
status=$(gcloud compute instances describe "$INSTANCE_NAME" \
--project "$GCP_PROJECT_ID" \
--zone "$INSTANCE_ZONE" \
--format="get(status)" || echo "NOT_FOUND")
if [ "$status" = "NOT_FOUND" ]; then
echo "Error: Instance $INSTANCE_NAME not found in zone $INSTANCE_ZONE" >&2
exit 1
fi
if [ "$status" = "RUNNING" ]; then
echo "Instance $INSTANCE_NAME is already running."
elif [ "$status" = "TERMINATED" ] || [ "$status" = "SUSPENDED" ]; then
echo "Instance $INSTANCE_NAME is $status. Starting it..."
gcloud compute instances start "$INSTANCE_NAME" \
--project "$GCP_PROJECT_ID" \
--zone "$INSTANCE_ZONE"
echo "Instance started successfully."
else
echo "Instance $INSTANCE_NAME has unexpected status: $status" >&2
exit 1
fi
- name: Wait for builder runner to be online
uses: actions/github-script@v8
env:
INSTANCE_NAME: areal-docker-builder
GH_PAT: ${{ secrets.GH_PAT }}
with:
github-token: ${{ secrets.GH_PAT }}
script: |
const instanceName = process.env.INSTANCE_NAME;
const maxAttempts = 120;
const delayMs = 10000;
const pat = process.env.GH_PAT;
if (!pat) {
core.setFailed('GH_PAT secret is not configured.');
return;
}
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const response = await github.rest.actions.listSelfHostedRunnersForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
per_page: 100,
request: {
headers: {
authorization: `token ${pat}`,
},
},
});
const found = response.data.runners.find((runner) => runner.name === instanceName);
if (found && found.status === 'online') {
core.info(`Builder runner ${instanceName} is online.`);
return;
}
core.info(`Builder runner ${instanceName} not ready yet (attempt ${attempt}/${maxAttempts}).`);
await wait(delayMs);
}
throw new Error(`Timed out waiting for builder runner ${instanceName} to come online.`);
heartbeat:
name: Runner heartbeat
needs: start-builder
runs-on: [self-hosted, areal-docker-builder]
steps:
- name: Heartbeat
run: |
echo "Runner heartbeat at $(date -u)"
echo "Runner: $(hostname)"
echo "Uptime: $(uptime)"
stop-builder:
name: Stop areal-docker-builder instance
needs:
- heartbeat
- start-builder
if: always()
runs-on: ubuntu-latest
env:
INSTANCE_NAME: areal-docker-builder
INSTANCE_ZONE: us-central1-f
steps:
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v3
with:
credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v3
- name: Stop builder instance
run: |
status=$(gcloud compute instances describe "$INSTANCE_NAME" \
--project "$GCP_PROJECT_ID" \
--zone "$INSTANCE_ZONE" \
--format="get(status)" 2>/dev/null || echo "NOT_FOUND")
if [ "$status" = "NOT_FOUND" ]; then
echo "Warning: Instance $INSTANCE_NAME not found in zone $INSTANCE_ZONE"
exit 0
fi
if [ "$status" = "RUNNING" ]; then
echo "Stopping instance $INSTANCE_NAME..."
gcloud compute instances stop "$INSTANCE_NAME" \
--project "$GCP_PROJECT_ID" \
--zone "$INSTANCE_ZONE"
echo "Instance stopped successfully."
else
echo "Instance $INSTANCE_NAME is already in status: $status"
fi