-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdevops-app-health-check.sh
More file actions
executable file
·333 lines (279 loc) · 10.9 KB
/
devops-app-health-check.sh
File metadata and controls
executable file
·333 lines (279 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/bin/bash
# DevOps App Health Check & Unit Verification
# Comprehensive health check for all units, spaces, and resources
set -e
echo "=========================================="
echo "DevOps App Health Check System"
echo "=========================================="
echo "Date: $(date)"
echo ""
# Colors
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
SPACE=${1:-drift-test-demo}
NAMESPACE=${2:-drift-test}
API_ENDPOINT=${3:-http://localhost:8082/api/live}
echo "Configuration:"
echo " ConfigHub Space: $SPACE"
echo " Kubernetes Namespace: $NAMESPACE"
echo " API Endpoint: $API_ENDPOINT"
echo ""
# Health metrics
HEALTH_SCORE=100
ISSUES=()
# Function to check health
check_health() {
local component=$1
local check_name=$2
local command=$3
local expected=$4
echo -n " $check_name ... "
if eval "$command" > /dev/null 2>&1; then
if [ "$expected" = "pass" ]; then
echo -e "${GREEN}✓ HEALTHY${NC}"
return 0
else
echo -e "${RED}✗ UNHEALTHY${NC}"
((HEALTH_SCORE-=10))
ISSUES+=("$component: $check_name failed")
return 1
fi
else
if [ "$expected" = "fail" ]; then
echo -e "${GREEN}✓ HEALTHY${NC}"
return 0
else
echo -e "${RED}✗ UNHEALTHY${NC}"
((HEALTH_SCORE-=10))
ISSUES+=("$component: $check_name failed")
return 1
fi
fi
}
echo "=========================================="
echo "1. CONFIGHHUB HEALTH CHECK"
echo "=========================================="
echo ""
# Check ConfigHub connectivity
echo "ConfigHub Connection:"
check_health "ConfigHub" "CLI available" "command -v cub" "pass"
check_health "ConfigHub" "Auth valid" "cub auth whoami 2>/dev/null" "pass"
check_health "ConfigHub" "Space exists" "cub space get $SPACE 2>/dev/null" "pass"
echo ""
echo "ConfigHub Units:"
# Get all units in space
UNITS=$(cub unit list --space $SPACE --output json 2>/dev/null | jq -r '.[].name' 2>/dev/null || echo "")
if [ -n "$UNITS" ]; then
UNIT_COUNT=$(echo "$UNITS" | wc -w)
echo -e " Total units: ${GREEN}$UNIT_COUNT${NC}"
for unit in $UNITS; do
echo -n " • $unit: "
# Check unit details
UNIT_DATA=$(cub unit get $unit --space $SPACE --output json 2>/dev/null || echo "{}")
if [ -n "$UNIT_DATA" ] && [ "$UNIT_DATA" != "{}" ]; then
# Extract key information
KIND=$(echo "$UNIT_DATA" | jq -r '.kind' 2>/dev/null || echo "unknown")
REPLICAS=$(echo "$UNIT_DATA" | jq -r '.spec.replicas' 2>/dev/null || echo "N/A")
echo -e "${GREEN}OK${NC} (Type: $KIND, Replicas: $REPLICAS)"
else
echo -e "${RED}ERROR${NC}"
((HEALTH_SCORE-=5))
ISSUES+=("ConfigHub: Unit $unit not accessible")
fi
done
else
echo -e " ${RED}No units found${NC}"
((HEALTH_SCORE-=20))
ISSUES+=("ConfigHub: No units in space $SPACE")
fi
echo ""
echo "=========================================="
echo "2. KUBERNETES HEALTH CHECK"
echo "=========================================="
echo ""
echo "Kubernetes Cluster:"
check_health "Kubernetes" "Kubectl available" "command -v kubectl" "pass"
check_health "Kubernetes" "Cluster reachable" "kubectl cluster-info 2>/dev/null | grep -q 'running'" "pass"
check_health "Kubernetes" "Namespace exists" "kubectl get namespace $NAMESPACE" "pass"
echo ""
echo "Kubernetes Resources:"
# Get deployments
DEPLOYMENTS=$(kubectl get deployments -n $NAMESPACE -o json 2>/dev/null | jq -r '.items[].metadata.name' 2>/dev/null || echo "")
if [ -n "$DEPLOYMENTS" ]; then
for deployment in $DEPLOYMENTS; do
echo -n " • Deployment $deployment: "
# Get deployment status
READY=$(kubectl get deployment $deployment -n $NAMESPACE -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
DESIRED=$(kubectl get deployment $deployment -n $NAMESPACE -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
if [ "$READY" = "$DESIRED" ] && [ "$READY" != "0" ]; then
echo -e "${GREEN}HEALTHY${NC} ($READY/$DESIRED replicas)"
else
echo -e "${YELLOW}DEGRADED${NC} ($READY/$DESIRED replicas)"
((HEALTH_SCORE-=5))
ISSUES+=("K8s: Deployment $deployment degraded")
fi
done
else
echo -e " ${YELLOW}No deployments found${NC}"
fi
echo ""
echo "=========================================="
echo "3. DRIFT DETECTION HEALTH"
echo "=========================================="
echo ""
# Check drift via API
echo "Drift Detection API:"
API_RESPONSE=$(curl -s $API_ENDPOINT 2>/dev/null || echo "{}")
if [ -n "$API_RESPONSE" ] && [ "$API_RESPONSE" != "{}" ]; then
echo -e " API Status: ${GREEN}ONLINE${NC}"
# Parse drift data
RESOURCES=$(echo "$API_RESPONSE" | jq '.resources' 2>/dev/null || echo "[]")
RESOURCE_COUNT=$(echo "$RESOURCES" | jq 'length' 2>/dev/null || echo "0")
DRIFTED_COUNT=$(echo "$RESOURCES" | jq '[.[] | select(.is_drifted == true)] | length' 2>/dev/null || echo "0")
echo " Resources monitored: $RESOURCE_COUNT"
echo " Resources with drift: $DRIFTED_COUNT"
if [ "$DRIFTED_COUNT" -gt 0 ]; then
echo ""
echo " Drifted Resources:"
echo "$RESOURCES" | jq -r '.[] | select(.is_drifted == true) | " • \(.name): Expected \(.expected_replicas), Got \(.replicas)"' 2>/dev/null
# Check corrections
CORRECTIONS=$(echo "$API_RESPONSE" | jq '.corrections' 2>/dev/null || echo "[]")
if [ "$(echo "$CORRECTIONS" | jq 'length')" -gt 0 ]; then
echo ""
echo " Corrections Available:"
echo "$CORRECTIONS" | jq -r '.[] | " • \(.resource): \(.impact)"' 2>/dev/null
# Verify corrections use ConfigHub
USES_CUB=$(echo "$CORRECTIONS" | jq -r '.[].command' | grep -c "cub unit" || echo "0")
USES_KUBECTL=$(echo "$CORRECTIONS" | jq -r '.[].command' | grep -c "kubectl" || echo "0")
if [ "$USES_KUBECTL" -gt 0 ]; then
echo -e " ${RED}✗ CRITICAL: Uses kubectl (prohibited!)${NC}"
((HEALTH_SCORE-=30))
ISSUES+=("CRITICAL: Corrections use kubectl")
elif [ "$USES_CUB" -gt 0 ]; then
echo -e " ${GREEN}✓ Corrections use ConfigHub only${NC}"
fi
fi
else
echo -e " ${GREEN}✓ No drift detected${NC}"
fi
else
echo -e " API Status: ${RED}OFFLINE${NC}"
((HEALTH_SCORE-=20))
ISSUES+=("API: Drift detection API not responding")
fi
echo ""
echo "=========================================="
echo "4. UNIT SYNCHRONIZATION CHECK"
echo "=========================================="
echo ""
echo "Verifying ConfigHub ↔ Kubernetes Sync:"
# Compare ConfigHub units with K8s deployments
for unit in $UNITS; do
# Extract deployment name from unit (assuming pattern like "app-name-unit")
DEPLOYMENT_NAME=$(echo "$unit" | sed 's/-unit$//' | sed 's/deployment-//')
echo -n " $unit → $DEPLOYMENT_NAME: "
# Check if deployment exists
if kubectl get deployment $DEPLOYMENT_NAME -n $NAMESPACE > /dev/null 2>&1; then
# Get replicas from both
CONFIGHUB_REPLICAS=$(cub unit get $unit --space $SPACE --output json 2>/dev/null | jq -r '.spec.replicas' 2>/dev/null || echo "?")
K8S_REPLICAS=$(kubectl get deployment $DEPLOYMENT_NAME -n $NAMESPACE -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "?")
if [ "$CONFIGHUB_REPLICAS" = "$K8S_REPLICAS" ]; then
echo -e "${GREEN}SYNCED${NC} (Replicas: $CONFIGHUB_REPLICAS)"
else
echo -e "${YELLOW}DRIFT${NC} (ConfigHub: $CONFIGHUB_REPLICAS, K8s: $K8S_REPLICAS)"
((HEALTH_SCORE-=5))
ISSUES+=("Sync: $unit has drift")
fi
else
echo -e "${RED}NOT DEPLOYED${NC}"
((HEALTH_SCORE-=10))
ISSUES+=("Sync: $unit not found in Kubernetes")
fi
done
echo ""
echo "=========================================="
echo "5. APP COMPLIANCE CHECK"
echo "=========================================="
echo ""
echo "Checking App Compliance:"
# Quick compliance checks for running apps
for app_dir in drift-detector cost-optimizer cost-impact-monitor; do
APP_PATH="/Users/alexisrichardson/github-repos/devops-examples/$app_dir"
if [ -d "$APP_PATH" ]; then
echo -n " $app_dir: "
# Check for kubectl in code
if grep -r "kubectl" $APP_PATH/*.go 2>/dev/null | grep -v "//" | grep -v "ConfigHub" > /dev/null; then
echo -e "${RED}NON-COMPLIANT${NC} (uses kubectl)"
((HEALTH_SCORE-=15))
ISSUES+=("Compliance: $app_dir uses kubectl")
else
echo -e "${GREEN}COMPLIANT${NC}"
fi
fi
done
echo ""
echo "=========================================="
echo "6. COST IMPACT CHECK"
echo "=========================================="
echo ""
if [ -n "$API_RESPONSE" ] && [ "$API_RESPONSE" != "{}" ]; then
TOTAL_COST=$(echo "$API_RESPONSE" | jq '.total_monthly_cost' 2>/dev/null || echo "0")
DRIFT_COST=$(echo "$API_RESPONSE" | jq '.drift_cost' 2>/dev/null || echo "0")
SAVINGS=$(echo "$API_RESPONSE" | jq '.potential_savings' 2>/dev/null || echo "0")
echo "Cost Analysis:"
echo " Total Monthly Cost: \$$TOTAL_COST"
echo " Drift Cost Impact: \$$DRIFT_COST"
echo " Potential Savings: \$$SAVINGS"
if [ "$(echo "$DRIFT_COST > 10" | bc -l 2>/dev/null || echo "0")" = "1" ]; then
echo -e " ${YELLOW}⚠ High drift cost detected${NC}"
((HEALTH_SCORE-=5))
ISSUES+=("Cost: High drift cost (\$$DRIFT_COST/month)")
else
echo -e " ${GREEN}✓ Drift cost within acceptable range${NC}"
fi
fi
echo ""
echo "=========================================="
echo "HEALTH CHECK SUMMARY"
echo "=========================================="
echo ""
# Calculate health status
if [ $HEALTH_SCORE -ge 90 ]; then
STATUS="${GREEN}✓ HEALTHY${NC}"
STATUS_TEXT="System is fully operational"
elif [ $HEALTH_SCORE -ge 70 ]; then
STATUS="${YELLOW}⚠ DEGRADED${NC}"
STATUS_TEXT="System has minor issues"
else
STATUS="${RED}✗ CRITICAL${NC}"
STATUS_TEXT="System has critical issues"
fi
echo -e "Overall Health Score: $HEALTH_SCORE/100"
echo -e "Status: $STATUS"
echo "$STATUS_TEXT"
if [ ${#ISSUES[@]} -gt 0 ]; then
echo ""
echo "Issues Found:"
for issue in "${ISSUES[@]}"; do
echo " • $issue"
done
fi
echo ""
echo "Quick Actions:"
if [ "$DRIFTED_COUNT" -gt 0 ]; then
echo " • Fix drift: curl -s $API_ENDPOINT | jq -r '.corrections[].command'"
fi
if [ "$HEALTH_SCORE" -lt 90 ]; then
echo " • Review issues above and take corrective action"
fi
echo ""
echo "Verification Commands:"
echo " • View units: cub unit list --space $SPACE"
echo " • Check deployments: kubectl get deployments -n $NAMESPACE"
echo " • API status: curl -s $API_ENDPOINT | jq '.timestamp'"
echo " • Run compliance test: ./test-app-compliance-quick.sh"
exit $([ $HEALTH_SCORE -ge 70 ] && echo 0 || echo 1)