-
Notifications
You must be signed in to change notification settings - Fork 0
200 lines (182 loc) · 7.77 KB
/
aca-schedule.yml
File metadata and controls
200 lines (182 loc) · 7.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
name: ACA Manual Scale (disabled schedule — ADR-018)
# ADR-018: INF-STG-EU_EHDS runs 24/7. Scheduled triggers disabled.
# The start/stop jobs remain available via workflow_dispatch for manual
# maintenance windows (e.g., cost investigations, planned downtime).
on:
# schedule:
# - cron: "0 5 * * 1-5" # disabled — was start (07:00 Europe/Berlin)
# - cron: "0 18 * * *" # disabled — was stop (20:00 Europe/Berlin)
workflow_dispatch:
inputs:
action:
description: "Action to run"
type: choice
options: [start, stop]
required: true
permissions:
id-token: write
contents: read
concurrency:
group: "azure-schedule"
cancel-in-progress: false
env:
RESOURCE_GROUP: rg-mvhd-dev
PG_SERVER: pg-mvhd-dev
jobs:
determine-action:
name: Determine Action
runs-on: ubuntu-latest
timeout-minutes: 2
outputs:
action: ${{ steps.decide.outputs.action }}
steps:
- id: decide
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "action=${{ inputs.action }}" >> "$GITHUB_OUTPUT"
elif [ "${{ github.event.schedule }}" = "0 5 * * 1-5" ]; then
echo "action=start" >> "$GITHUB_OUTPUT"
else
echo "action=stop" >> "$GITHUB_OUTPUT"
fi
stop:
name: Scale Down (Stop)
needs: determine-action
if: needs.determine-action.outputs.action == 'stop'
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Azure Login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Scale all Container Apps to 0
run: |
echo "=== Scaling 13 Container Apps to 0 replicas ==="
APPS=(
mvhd-neo4j mvhd-nats mvhd-keycloak mvhd-vault
mvhd-neo4j-proxy mvhd-ui mvhd-controlplane
mvhd-dp-fhir mvhd-dp-omop mvhd-identityhub mvhd-issuerservice
mvhd-tenant-mgr mvhd-provision-mgr
)
for app in "${APPS[@]}"; do
echo " $app -> 0/0"
az containerapp update \
--name "$app" --resource-group "$RESOURCE_GROUP" \
--min-replicas 0 --max-replicas 0 \
-o none || echo "::warning::Failed to scale $app"
done
- name: Stop PostgreSQL Flexible Server
run: |
echo "=== Stopping $PG_SERVER ==="
az postgres flexible-server stop \
--name "$PG_SERVER" --resource-group "$RESOURCE_GROUP" \
-o none || echo "::warning::PG stop failed (may already be stopped)"
- name: Stop summary
if: always()
run: |
echo "## Off-Hours Scale-Down" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "**Time:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$GITHUB_STEP_SUMMARY"
echo "**Status:** All 13 Container Apps scaled to 0, PostgreSQL stopped" >> "$GITHUB_STEP_SUMMARY"
echo "**Next start:** Monday-Friday 05:00 UTC (07:00 Europe/Berlin CEST)" >> "$GITHUB_STEP_SUMMARY"
start:
name: Scale Up (Start)
needs: determine-action
if: needs.determine-action.outputs.action == 'start'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Azure Login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Start PostgreSQL Flexible Server
run: |
echo "=== Starting $PG_SERVER ==="
az postgres flexible-server start \
--name "$PG_SERVER" --resource-group "$RESOURCE_GROUP" \
-o none || echo "::warning::PG start failed (may already be running)"
- name: Wait for PostgreSQL to be Ready
run: |
for i in $(seq 1 30); do
state=$(az postgres flexible-server show \
--name "$PG_SERVER" --resource-group "$RESOURCE_GROUP" \
--query "state" -o tsv 2>/dev/null || echo "Unknown")
echo " attempt $i: state=$state"
if [ "$state" = "Ready" ]; then
echo "PostgreSQL is Ready"
break
fi
sleep 10
done
- name: Restore Container App scale profiles
run: |
echo "=== Restoring Container App replica profiles ==="
# Apps with min=1 max=1 (singleton stateful / small services)
for app in mvhd-neo4j mvhd-nats mvhd-keycloak mvhd-vault mvhd-identityhub mvhd-issuerservice mvhd-tenant-mgr mvhd-provision-mgr; do
echo " $app -> 1/1"
az containerapp update \
--name "$app" --resource-group "$RESOURCE_GROUP" \
--min-replicas 1 --max-replicas 1 \
-o none || echo "::warning::Failed to scale $app"
done
# Apps with min=1 max=2
for app in mvhd-neo4j-proxy mvhd-controlplane mvhd-dp-fhir mvhd-dp-omop; do
echo " $app -> 1/2"
az containerapp update \
--name "$app" --resource-group "$RESOURCE_GROUP" \
--min-replicas 1 --max-replicas 2 \
-o none || echo "::warning::Failed to scale $app"
done
# UI: min=1 max=3
echo " mvhd-ui -> 1/3"
az containerapp update \
--name mvhd-ui --resource-group "$RESOURCE_GROUP" \
--min-replicas 1 --max-replicas 3 \
-o none || echo "::warning::Failed to scale mvhd-ui"
- name: Wait for stateful services to be reachable
run: |
echo "Waiting 75s for Neo4j + Vault pods to start..."
sleep 75
- name: Re-bootstrap Vault (in-memory gotcha #1)
run: |
echo "=== Re-running bootstrap job (Vault secrets + keys) ==="
az containerapp job start \
--name mvhd-vault-bootstrap --resource-group "$RESOURCE_GROUP" \
-o none || echo "::warning::Vault bootstrap job start failed"
# Neo4j is now persistent on Azure Files (ADR-017). The seed job is no
# longer required on every start — data survives the 0/0 → 1/1 cycle.
# If the database is empty (fresh storage account) the one-time bootstrap
# workflow `bootstrap-stateful.yml` should be run manually instead.
- name: Verify UI is reachable
id: verify
run: |
UI_FQDN=$(az containerapp show --name mvhd-ui \
--resource-group "$RESOURCE_GROUP" \
--query "properties.configuration.ingress.fqdn" -o tsv)
echo "ui_url=https://${UI_FQDN}" >> "$GITHUB_OUTPUT"
echo "Checking https://${UI_FQDN}..."
for i in $(seq 1 30); do
code=$(curl -sf -o /dev/null -w "%{http_code}" "https://${UI_FQDN}" 2>/dev/null || echo "000")
if [ "$code" = "200" ]; then
echo "UI is healthy (HTTP 200) after $((i * 10))s"
exit 0
fi
echo " attempt $i: HTTP $code"
sleep 10
done
echo "::warning::UI did not return 200 within 300s — may need manual check"
- name: Start summary
if: always()
run: |
echo "## On-Hours Scale-Up" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "**Time:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$GITHUB_STEP_SUMMARY"
echo "**Target:** ${{ steps.verify.outputs.ui_url }}" >> "$GITHUB_STEP_SUMMARY"
echo "**Status:** PostgreSQL started, 13 Container Apps scaled up, Vault re-bootstrapped (Neo4j data persists via Azure Files — see ADR-017)" >> "$GITHUB_STEP_SUMMARY"
echo "**Next stop:** Today 18:00 UTC (20:00 Europe/Berlin CEST)" >> "$GITHUB_STEP_SUMMARY"