Skip to content

Commit 94cc003

Browse files
authored
system status folder (#29)
* everything for system status * without coin-api
1 parent 3bff5cd commit 94cc003

9 files changed

Lines changed: 355 additions & 0 deletions

File tree

status/docker-compose.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
version: '2'
2+
3+
services:
4+
prometheus:
5+
image: prom/prometheus:latest
6+
restart: always
7+
ports:
8+
- 9090:9090
9+
volumes:
10+
- ./prometheus.yml:/etc/prometheus/prometheus.yml
11+
command:
12+
- --config.file=/etc/prometheus/prometheus.yml
13+
14+
scraper:
15+
container_name: scraper
16+
build:
17+
context: .
18+
dockerfile: ./dockerfile.scraper
19+
volumes:
20+
- ./output:/app/output
21+
- ./templates:/app/templates
22+
- ./json.json:/app/json.json
23+
ports:
24+
- "8000:8000"
25+
restart: 'on-failure'
26+
command: python3 scraper.py --json json.json --promurl "http://one.sce/prometheus/metrics"
27+
28+
29+
nginx:
30+
image: nginx:1.25.3
31+
ports:
32+
- 80:80
33+
volumes:
34+
- ./nginx.conf:/etc/nginx/nginx.conf

status/dockerfile.scraper

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# filepath: /Users/vineet/Projects/sce_system_status/prom/Dockerfile.scraper
2+
FROM python:3.9-slim
3+
4+
WORKDIR /app
5+
6+
COPY requirements.txt .
7+
8+
RUN pip3 install -r requirements.txt
9+
10+
COPY flags.py .
11+
COPY scraper.py .
12+
COPY templates/ ./templates/
13+
14+
CMD ["python3", "scraper.py", "--json", "json.json"]

status/flags.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import argparse
2+
3+
def get_args():
4+
parser = argparse.ArgumentParser()
5+
parser.add_argument(
6+
"--interval",
7+
"-int",
8+
type= int,
9+
default = 15,
10+
help = "interval for how often queries should be done"
11+
)
12+
parser.add_argument(
13+
"--port",
14+
type = int,
15+
default = 8000,
16+
help = "port for server to be hosted on, defaults to 8000"
17+
)
18+
parser.add_argument(
19+
"--json",
20+
type = str,
21+
required = True,
22+
help = "argument to a json file, where the json file specifies what services we need to query"
23+
)
24+
parser.add_argument(
25+
"--promurl",
26+
type = str,
27+
default= "http://prometheus:9090",
28+
help = "the url for the promtheus container thats running that has to be scraped"
29+
)
30+
31+
return parser.parse_args()

status/json.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[
2+
{
3+
"job-id": "prometheus-aggregation",
4+
"query": "up"
5+
}
6+
]

status/nginx.conf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
http {
2+
# Define cache path and parameters
3+
proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=html_cache:10m max_size=100m inactive=60m;
4+
proxy_temp_path /var/cache/nginx/temp;
5+
6+
#for http://localhost/status
7+
server{
8+
listen 80;
9+
server_name _;
10+
11+
# Enable caching
12+
proxy_cache html_cache;
13+
proxy_cache_valid 200 302 30s;
14+
proxy_cache_valid 404 30s;
15+
16+
location /{
17+
proxy_pass http://scraper:8000;
18+
19+
# Cache HTML files
20+
location ~* \.html$ {
21+
proxy_pass http://scraper:8000;
22+
proxy_cache html_cache;
23+
proxy_cache_min_uses 1;
24+
proxy_cache_lock on;
25+
add_header X-Cache-Status $upstream_cache_status;
26+
}
27+
}
28+
}
29+
}
30+
31+
events{ }

status/prometheus.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
global:
2+
scrape_interval: 10s
3+
4+
5+
scrape_configs:
6+
- job_name: 'coin-api'
7+
static_configs:
8+
- targets: ['coin-api:5000']

status/requirements.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
fastapi==0.84.0
2+
uvicorn==0.18.3
3+
Jinja2==3.0.2
4+
py-grpc-prometheus==0.7.0
5+
prometheus_api_client
6+

status/scraper.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from dataclasses import dataclass
2+
from fastapi.responses import HTMLResponse
3+
from prometheus_api_client import PrometheusConnect
4+
from fastapi import FastAPI, Request
5+
from fastapi.templating import Jinja2Templates
6+
from fastapi.staticfiles import StaticFiles
7+
import uvicorn
8+
from flags import get_args
9+
import json
10+
import time
11+
import threading
12+
import os
13+
from datetime import datetime, timedelta
14+
import pytz
15+
import requests
16+
17+
18+
19+
app = FastAPI()
20+
21+
pacific_tz = pytz.timezone('US/Pacific')
22+
23+
templates = Jinja2Templates(directory="templates")
24+
25+
args = get_args()
26+
27+
prom = PrometheusConnect(url = args.promurl, disable_ssl=True)#this will query "http://prometheus:9090/api/v1/query?query=up"
28+
29+
metrics_data = []
30+
up_hours = 24
31+
32+
@dataclass
33+
class metrics:
34+
job_name: str
35+
timestamp: float
36+
value: float
37+
38+
def check_status(query):
39+
params = {"query" : query}
40+
try:
41+
response = requests.get("http://prometheus:9090/api/v1/query", params = params)
42+
response.raise_for_status()# Raise an error for HTTP issues
43+
json_response = response.json()
44+
if json_response["status"]=="success":
45+
return True
46+
elif json_response["status"]==None:
47+
print("the status key does not exist!")
48+
return False
49+
else:
50+
return False
51+
52+
except Exception as e:
53+
print(f"Error querying Prometheus: {e}")
54+
return None
55+
56+
def polling_loop(interval, config):
57+
while True:
58+
global metrics_data
59+
metrics_data = []
60+
for hosts in config:
61+
service_name = hosts["job-id"]
62+
prom_query = hosts["query"]
63+
if prom_query == "up":
64+
process_up_query(prom_query, service_name)
65+
time.sleep(interval)
66+
67+
service_data = {}
68+
69+
def process_up_query(query, service_name):
70+
global metrics_data, service_data
71+
process_time_query("time() - process_start_time_seconds", service_name)
72+
if not check_status(query="up"):
73+
print("status is not success, please look into it!!")
74+
try:
75+
result = prom.custom_query(query=query)
76+
if not result:
77+
print(f"No results for query: {query}")
78+
last_active = datetime.now(pacific_tz).strftime("%Y-%m-%d %H:%M:%S %Z")
79+
metrics_data.append({
80+
"instance": service_name,
81+
"status": "Error in querying"
82+
})
83+
return
84+
85+
for metric in result:
86+
job_name = metric.get('metric',{}).get('job', "")#for later use in dataclass
87+
time_stamp = metric.get('value', [])[0]#for later use in dataclass
88+
value = metric.get('value', [])[1]
89+
# last_active = datetime.now(pacific_tz).strftime("%Y-%m-%d %H:%M:%S %Z")
90+
status = "Healthy" if float(value) > 0 else "Unhealthy"
91+
if status == "Unhealthy":
92+
current = get_first_match_time(prom=prom, prom_query="up", match_value=0, hours=up_hours)
93+
metrics_data.append({
94+
"instance": service_name,
95+
"status": current
96+
})
97+
else:
98+
metrics_data.append({
99+
"instance": service_name,
100+
"status": "Healthy"
101+
})
102+
except Exception as e:
103+
print(f"Error processing query '{query}': {e}")
104+
metrics_data.append({
105+
"instance": service_name,
106+
"status": "Unhealthy due to error!"
107+
})
108+
109+
110+
def process_time_query(query, service_name):
111+
global metrics_data, up_hours
112+
try:
113+
result = prom.custom_query(query=query)
114+
if result and len(result) > 0:
115+
first_result = result[0]
116+
uptime_seconds = float(first_result["value"][1])
117+
up_hours = int(uptime_seconds/3600)
118+
if up_hours == 0:
119+
up_hours = 1
120+
except Exception as e:
121+
print(f"Error processing time query '{query}': {e}")
122+
123+
def get_first_match_time(prom, prom_query, match_value=0, hours=24):
124+
global metrics_data
125+
prom_query = "up"
126+
start_time = datetime.now() - timedelta(hours=hours)
127+
end_time = datetime.now()
128+
129+
try:
130+
result = prom.get_metric_range_data(
131+
metric_name=prom_query,
132+
start_time=start_time,
133+
end_time=end_time,
134+
)
135+
136+
for series in result:
137+
saw_up = False
138+
for timestamp, value in reversed(series["values"]):
139+
v = float(value)
140+
if v == 1:
141+
saw_up = True
142+
elif v == 0 and saw_up:
143+
utc_time = datetime.utcfromtimestamp(float(timestamp))
144+
pacific_time = utc_time.astimezone(pacific_tz)
145+
readable_time = pacific_time.strftime("%Y-%m-%d %H:%M:%S %Z")
146+
status = f"Unhealthy as of {readable_time}"
147+
return status
148+
except Exception as e:
149+
print(f"Error in get_first_match_time: {e}")
150+
return "Error checking status history"
151+
152+
153+
@app.get("/", response_class=HTMLResponse)
154+
async def get_metrics(request: Request):
155+
return templates.TemplateResponse(
156+
"health.html",
157+
{"request": request, "metrics": metrics_data, "timestamp": datetime.now(pacific_tz).strftime("%Y-%m-%d %H:%M:%S %Z")}
158+
)
159+
160+
def main():
161+
with open(args.json, "r") as file:
162+
config = json.load(file)
163+
164+
polling_thread = threading.Thread(target = polling_loop, args = (args.interval,config), daemon=True)#The daemon=True ensures the thread exits when the main program exits.
165+
polling_thread.start()
166+
167+
uvicorn.run(app, host="0.0.0.0", port=args.port)
168+
169+
if __name__ == "__main__":
170+
main()

status/templates/health.html

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>Prometheus Metrics</title>
5+
<style>
6+
body {
7+
font-family: Arial, sans-serif;
8+
margin: 20px;
9+
}
10+
h1 {
11+
color: #333;
12+
}
13+
table {
14+
border-collapse: collapse;
15+
width: 100%;
16+
margin-top: 20px;
17+
}
18+
th, td {
19+
border: 1px solid #ddd;
20+
padding: 8px;
21+
text-align: left;
22+
}
23+
th {
24+
background-color: #f2f2f2;
25+
}
26+
.healthy {
27+
color: green;
28+
}
29+
.tie {
30+
color: blue;
31+
}
32+
.unhealthy {
33+
color: red;
34+
}
35+
</style>
36+
</head>
37+
<body>
38+
<h1>Prometheus Metrics</h1>
39+
<p>Last updated: {{ timestamp }}</p>
40+
<table>
41+
<tr>
42+
<th>Instance</th>
43+
<th>Value</th>
44+
</tr>
45+
{% for item in metrics %}
46+
<tr>
47+
<td>{{ item.instance }}</td>
48+
<td class="{% if item.status == 'Healthy' %}healthy{% else %}unhealthy{% endif %}">
49+
{{ item.status }}
50+
</td>
51+
</tr>
52+
{% endfor %}
53+
</table>
54+
</body>
55+
</html>

0 commit comments

Comments
 (0)