Skip to content

Commit 9d4d4e2

Browse files
committed
Add python plotting script for raw data and anomalies
1 parent 98b25e3 commit 9d4d4e2

1 file changed

Lines changed: 172 additions & 0 deletions

File tree

plot_data.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""
2+
Plot anomalies and raw data from CSV files.
3+
"""
4+
5+
import pandas as pd
6+
import matplotlib.pyplot as plt
7+
from datetime import datetime
8+
import argparse
9+
10+
11+
def convert_timestamp(timestamp_ms):
12+
"""Converts Unix timestamp in milliseconds to a readable datetime format"""
13+
timestamp_sec = timestamp_ms // 1000
14+
dt = datetime.fromtimestamp(timestamp_sec)
15+
return dt
16+
17+
18+
def plot_raw_data(csv_file, value_name='Value'):
19+
"""Plots raw data values"""
20+
21+
print(f"Loading raw data from {csv_file}...")
22+
df = pd.read_csv(csv_file)
23+
24+
# Convert timestamps
25+
df['datetime'] = df['timestamp'].apply(convert_timestamp)
26+
27+
# Create plot
28+
plt.figure(figsize=(14, 6))
29+
plt.plot(df['datetime'], df['value'], marker='o', linestyle='-',
30+
markersize=3, linewidth=1, color='steelblue', label=value_name)
31+
32+
# Formatting
33+
plt.title(f'{value_name} - Raw Data Plot', fontsize=14, fontweight='bold')
34+
plt.xlabel('Time', fontsize=12)
35+
plt.ylabel(value_name, fontsize=12)
36+
plt.grid(True, alpha=0.3)
37+
plt.legend()
38+
39+
# Rotate x-axis labels by 45 degrees for better readability
40+
plt.xticks(rotation=45, ha='right')
41+
plt.tight_layout()
42+
43+
plt.show()
44+
45+
46+
def plot_anomalies(csv_file, value_name='Value', show_anomaly_values=False):
47+
"""Plots data with marked anomalies and background shading"""
48+
49+
print(f"Loading anomaly data from {csv_file}...")
50+
df = pd.read_csv(csv_file)
51+
52+
# Convert timestamps
53+
df['datetime'] = df['timestamp'].apply(convert_timestamp)
54+
55+
# Convert boolean values (if needed)
56+
df['isAnomaly'] = df['isAnomaly'].astype(bool)
57+
58+
# Create plot
59+
fig, ax = plt.subplots(figsize=(14, 6))
60+
61+
# Find continuous anomaly regions
62+
anomaly_regions = []
63+
in_anomaly = False
64+
start_idx = None
65+
66+
for idx, row in enumerate(df.itertuples(index=False)):
67+
if row.isAnomaly and not in_anomaly:
68+
# Start of an anomaly region
69+
in_anomaly = True
70+
start_idx = idx
71+
elif not row.isAnomaly and in_anomaly:
72+
# End of an anomaly region
73+
in_anomaly = False
74+
anomaly_regions.append((start_idx, idx - 1))
75+
76+
# Handle case where data ends with an anomaly
77+
if in_anomaly:
78+
anomaly_regions.append((start_idx, len(df) - 1))
79+
80+
# Add red background for anomaly regions
81+
for start_idx, end_idx in anomaly_regions:
82+
start_time = df.iloc[start_idx]['datetime']
83+
end_time = df.iloc[end_idx]['datetime']
84+
ax.axvspan(start_time, end_time, alpha=0.2, color='red', zorder=0)
85+
86+
# Plot all values as continuous line
87+
ax.plot(df['datetime'], df['value'], marker='o', linestyle='-',
88+
markersize=3, linewidth=1, color='steelblue', label=value_name, zorder=2)
89+
90+
# Color anomaly points differently
91+
anomalies = df[df['isAnomaly']]
92+
ax.scatter(anomalies['datetime'], anomalies['value'],
93+
color='red', s=80, marker='o', label='Anomaly', zorder=3)
94+
95+
# Optionally plot anomaly scores
96+
if show_anomaly_values:
97+
# Create secondary y-axis for anomaly scores
98+
ax2 = ax.twinx()
99+
100+
# Plot anomaly score as line with markers
101+
ax2.plot(df['datetime'], df['anomalyScore'], marker='s', linestyle='--',
102+
markersize=3, linewidth=1, color='orange', label='Anomaly Score', zorder=2, alpha=0.7)
103+
104+
# Format secondary axis
105+
ax2.set_ylabel('Anomaly Score', fontsize=12, color='orange')
106+
ax2.tick_params(axis='y', labelcolor='orange')
107+
108+
# Combine legends from both axes
109+
lines1, labels1 = ax.get_legend_handles_labels()
110+
lines2, labels2 = ax2.get_legend_handles_labels()
111+
ax.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
112+
113+
# Formatting
114+
ax.set_title(f'{value_name} with Anomaly Detection', fontsize=14, fontweight='bold')
115+
ax.set_xlabel('Time', fontsize=12)
116+
ax.set_ylabel(value_name, fontsize=12)
117+
ax.grid(True, alpha=0.3)
118+
119+
if not show_anomaly_values:
120+
ax.legend()
121+
122+
# Rotate x-axis by 45 degrees
123+
plt.xticks(rotation=45, ha='right')
124+
plt.tight_layout()
125+
126+
plt.show()
127+
128+
129+
def main():
130+
"""Main function with argument parser"""
131+
132+
parser = argparse.ArgumentParser(
133+
description='Plots data from CSV files',
134+
formatter_class=argparse.RawDescriptionHelpFormatter,
135+
epilog="""
136+
Examples:
137+
python plot_data.py --raw cpu_usage_raw.csv --value-name "CPU Usage"
138+
python plot_data.py --anomalies cpu_anomalies.csv --value-name "CPU Usage"
139+
python plot_data.py --anomalies cpu_anomalies.csv --value-name "CPU Usage" --show-anomaly-values
140+
python plot_data.py --raw data.csv --value-name "Temperature"
141+
"""
142+
)
143+
144+
# Define arguments
145+
parser.add_argument('--raw', type=str, metavar='FILE',
146+
help='Plots raw data')
147+
parser.add_argument('--anomalies', type=str, metavar='FILE',
148+
help='Plots data with anomaly detection')
149+
parser.add_argument('--value-name', type=str, metavar='NAME', default='Value',
150+
help='Name of the values being plotted (e.g., CPU Usage, Memory, Temperature)')
151+
parser.add_argument('--show-anomaly-values', action='store_true',
152+
help='Show anomaly scores on a secondary y-axis')
153+
154+
args = parser.parse_args()
155+
156+
# At least one option must be chosen
157+
if not args.raw and not args.anomalies:
158+
print("Error: At least one of --raw or --anomalies must be specified")
159+
print()
160+
parser.print_help()
161+
return
162+
163+
# Plot data
164+
if args.raw:
165+
plot_raw_data(args.raw, args.value_name)
166+
167+
if args.anomalies:
168+
plot_anomalies(args.anomalies, args.value_name, args.show_anomaly_values)
169+
170+
171+
if __name__ == "__main__":
172+
main()

0 commit comments

Comments
 (0)