Skip to content

Commit d202116

Browse files
committed
pylint
1 parent 2aedd35 commit d202116

2 files changed

Lines changed: 73 additions & 47 deletions

File tree

forecastconsumption/fronius_solar_web.py

Lines changed: 71 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
#%%
1+
"""
2+
Class to create load profile from SolarWeb Exports
3+
"""
24
import logging
5+
import os
36

4-
logger = logging.getLogger("__main__")
5-
logger.info('[SolarWeb Export Processor] loading module')
6-
7-
import pandas as pd
87
import numpy as np
8+
import pandas as pd
99
import pytz
10-
import os
10+
11+
logger = logging.getLogger("__main__")
12+
logger.info('[SolarWeb Export Processor] loading module')
1113

1214
class SolarWebExportProcessor:
1315
"""
@@ -20,13 +22,14 @@ class SolarWebExportProcessor:
2022
Additionally, the following columns can be included:
2123
- "Wattpilot / Energie Wattpilot" ergo consumption from Fronius Wattpilot
2224
23-
If these additional columns are included then the load from these "smart" consumers will be subtracted from the
24-
load to get a "base load" under the assumption that these will only run in the cheapest hours anyway.
25+
If these additional columns are included then the load from these "smart" consumers will be
26+
subtracted from the load to get a "base load" under the assumption that these will only run
27+
in the cheapest hours anyway.
2528
2629
The load profile will output month, weekday, hour and energy in Wh
2730
28-
Any gaps in the timeseries will be filled with the weekday average across the existing dataset unless
29-
fill_empty_with_average is set to False.
31+
Any gaps in the timeseries will be filled with the weekday average across the existing dataset
32+
unless fill_empty_with_average is set to False.
3033
3134
Key Features:
3235
- Loads data from a SolarWeb exported Excel file.
@@ -37,29 +40,34 @@ class SolarWebExportProcessor:
3740
- Exports processed data to a CSV file.
3841
3942
Attributes:
40-
file_path (str): Path to the input Excel file.
41-
output_path (str): Path to save the output CSV file.
42-
timezone (str): Timezone for the data (default: 'Europe/Berlin').
43-
fill_empty_with_average (bool): Whether to fill missing data with averages (default: True).
44-
smooth_base_load (bool): Whether to smooth the wallbox ramps in the calculated base load (default: True).
45-
smoothing_threshold (int): Threshold for detecting switched on/off EV wallbox loads (default: 1200 Watts).
46-
smoothing_window_size (int): Window size for smoothing around EV charging (default: 2).
47-
resample_freq (str): Frequency for resampling data (default: '60min').
48-
df (pd.DataFrame): The main DataFrame holding the processed data.
43+
file_path (str): Path to the input Excel filed
44+
output_path (str): Path to save the output CSV file
45+
timezone (str): Timezone for the data (default: 'Europe/Berlin')
46+
fill_empty_with_average (bool): Whether to fill missing data with averages (default: True)
47+
smooth_base_load (bool): smooth the wallbox ramps in calculated base load (default: True)
48+
smoothing_threshold (int): Threshold to detect switched on/off EV charging (default: 1200 W)
49+
smoothing_window_size (int): Window size for smoothing around EV charging (default: 2)
50+
resample_freq (str): Frequency for resampling data (default: '60min')
51+
df (pd.DataFrame): The main DataFrame holding the processed data
4952
"""
5053

51-
def __init__(self, file_path, output_path='../config/generated_load_profile.csv', timezone='Europe/Berlin',
52-
fill_empty_with_average=True, smooth_base_load=True, smoothing_threshold=1200,
53-
smoothing_window_size=2, resample_freq='60min'):
54+
# pylint: disable=too-many-instance-attributes, too-many-arguments
55+
# Nine are ok here
56+
57+
def __init__(self, file_path, *, output_path='/config/generated_load_profile.csv',
58+
timezone='Europe/Berlin',
59+
fill_empty_with_average=True,
60+
smooth_base_load=True, smoothing_threshold=1200,smoothing_window_size=2,
61+
resample_freq='60min'):
5462
"""
5563
Initialize the SolarWebExportProcessor.
5664
5765
:param file_path: Path to the Excel file containing the data.
58-
:param output_path: Path to save the output CSV file (default: '../config/generated_load_profile.csv').
66+
:param output_path: Path to output CSV file (default: '/config/generated_load_profile.csv').
5967
:param timezone: Timezone for the data (default: 'Europe/Berlin').
6068
:param fill_empty_with_average: Whether to fill missing data with averages (default: True).
6169
:param smooth_base_load: Whether to smooth the base load (default: True).
62-
:param smoothing_threshold: Threshold for detecting sudden changes in base load (default: 1200 Watts).
70+
:param smoothing_threshold: Threshold to detect switched on/off EV charge (default: 1200 W).
6371
:param smoothing_window_size: Window size for smoothing around sudden changes (default: 2).
6472
:param resample_freq: Frequency for resampling data (default: '60min').
6573
"""
@@ -86,7 +94,9 @@ def load_data(self):
8694
# Check if the data has at least 1-hour resolution
8795
time_diff = self.df.index.to_series().diff().min()
8896
if time_diff > pd.Timedelta(hours=1):
89-
raise ValueError(f"The data resolution is larger than 1 hour. Minimum time difference found: {time_diff}.")
97+
raise ValueError(
98+
f"The data resolution is larger than 1 hour. "
99+
f"Minimum time difference found: {time_diff}.")
90100

91101
# Convert float64 columns to float32 for file/memory size
92102
float64_cols = self.df.select_dtypes(include='float64').columns
@@ -100,8 +110,9 @@ def process_wattpilot_columns(self):
100110

101111
# Step 2: Check if any matching columns exist
102112
if not wattpilot_columns.empty:
103-
# Create a new column "Load_Wallbox" with the sum of these columns along axis=1 (across rows)
104-
self.df[('Load_Wallbox', '[Wh]')] = self.df[wattpilot_columns].sum(axis=1) # this also replaces all NaN with 0
113+
# Create a new column "Load_Wallbox" with the row sum of these columns
114+
# This also replaces all NaN with 0
115+
self.df[('Load_Wallbox', '[Wh]')] = self.df[wattpilot_columns].sum(axis=1)
105116
else:
106117
# If no matching columns exist, create a "Load_Wallbox" column with zeros
107118
self.df[('Load_Wallbox', '[Wh]')] = 0
@@ -111,36 +122,47 @@ def calculate_base_load(self):
111122

112123
# Check if the required column ('Verbrauch', '[Wh]') exists
113124
if ('Verbrauch', '[Wh]') not in self.df.columns:
114-
raise KeyError(f"The required column ('Verbrauch', '[Wh]') does not exist in the input data.")
125+
raise KeyError(
126+
"The required column ('Verbrauch', '[Wh]') does not exist in the input data."
127+
)
115128

116129
# Calculate a base load by removing the wallbox loads
117-
self.df[('base_load', '[Wh]')] = self.df['Verbrauch', '[Wh]'] - self.df['Load_Wallbox', '[Wh]']
130+
self.df[('base_load', '[Wh]')] = (
131+
self.df['Verbrauch', '[Wh]'] - self.df['Load_Wallbox', '[Wh]']
132+
)
118133

119134
# Smoothing of data where Wallbox starts or ends charging due to artifacts (if enabled)
120135
if self.smooth_base_load:
121136
# Step 1: Calculate the difference between consecutive values
122137
self.df[('WB_diff', '[Wh]')] = self.df['Load_Wallbox', '[Wh]'].diff().abs()
123138

124139
# Step 2: Define a threshold for detecting sudden changes (e.g., a large jump)
125-
sudden_change_idx = self.df[self.df[('WB_diff', '[Wh]')] > self.smoothing_threshold / 12].index # We're at 5 min intervals thus / 12
140+
sudden_change_idx = self.df[
141+
self.df[('WB_diff', '[Wh]')] > self.smoothing_threshold / 12
142+
].index # We're at 5 min intervals thus / 12
126143

127144
# Step 3: Create a new smoothed base load curve
128145
self.df[('base_load_smoothed', '[Wh]')] = self.df[('base_load', '[Wh]')]
129146

130-
# Smooth only around the points with sudden changes (e.g., within a window of +/- smoothing_window_size)
147+
# Smooth only around the points with sudden changes
148+
# (e.g., within a window of +/- smoothing_window_size)
131149
for idx in sudden_change_idx:
132150
int_idx = self.df.index.get_loc(idx)
133151
# Get the window around the sudden change index (ensuring we can't go out of bounds)
134152
start_idx = max(1, int_idx - self.smoothing_window_size)
135153
end_idx = min(len(self.df) - 1, int_idx + self.smoothing_window_size)
136154

137155
# Calculate averages before and after ramp
138-
avg_before = self.df[('base_load_smoothed', '[Wh]')].iloc[start_idx - 1:int_idx - 1].mean()
139-
avg_after = self.df[('base_load_smoothed', '[Wh]')].iloc[int_idx + 1:end_idx + 1].mean()
156+
avg_before = self.df[('base_load_smoothed', '[Wh]')].iloc[
157+
start_idx - 1 : int_idx - 1
158+
].mean()
159+
avg_after = self.df[('base_load_smoothed', '[Wh]')].iloc[
160+
int_idx + 1 : end_idx + 1
161+
].mean()
140162

141163
# Use averages to replace at detected ramps
142-
self.df[('base_load_smoothed', '[Wh]')].iat[int_idx - 1] = avg_before # for ramp downs
143-
self.df[('base_load_smoothed', '[Wh]')].iat[int_idx] = avg_after # for ramp ups
164+
self.df[('base_load_smoothed', '[Wh]')].iat[int_idx - 1] = avg_before # ramp downs
165+
self.df[('base_load_smoothed', '[Wh]')].iat[int_idx] = avg_after # ramp ups
144166
else:
145167
# If smoothing is disabled, use the unsmoothed base load
146168
self.df[('base_load_smoothed', '[Wh]')] = self.df[('base_load', '[Wh]')]
@@ -150,10 +172,10 @@ def resample_and_add_temporal_columns(self):
150172
# Resampling to hourly data
151173
def custom_agg(column):
152174
if column.name[1] == '[Wh]': # Check the second level of the column header
153-
return column.sum() # Apply sum to 'Wh'
175+
result = column.sum() # Apply sum to 'Wh'
154176
else:
155-
result = column.mean() # Apply mean to all others
156-
return np.float32(result) # Convert back to float32
177+
result = np.float32(column.mean()) # Apply mean to all others & back to float32
178+
return result
157179

158180
# Resample dataframe to hourly data
159181
self.df = self.df.resample(self.resample_freq).apply(custom_agg)
@@ -178,12 +200,13 @@ def calculate_energy(group):
178200
})
179201

180202
# Group by month, weekday, and hour, and calculate the mean energy consumption
181-
grouped = self.df.groupby(['month', 'weekday', 'hour'])['base_load_smoothed'].apply(calculate_energy).unstack()
203+
grouped = self.df.groupby(['month', 'weekday', 'hour']
204+
)['base_load_smoothed'].apply(calculate_energy).unstack()
182205

183206
# Check if the grouped result is missing rows
184207
expected_rows = 12 * 7 * 24 # 12 months, 7 weekdays, 24 hours
185208
if len(grouped) < expected_rows and self.fill_empty_with_average:
186-
print("Data is missing rows. Filling missing values with averages...")
209+
logger.info("Data is missing rows. Filling missing values with averages...")
187210

188211
# Create a complete multi-index for all combinations of month, weekday, and hour
189212
full_index = pd.MultiIndex.from_product(
@@ -208,12 +231,12 @@ def calculate_energy(group):
208231

209232
# Write the result to a CSV file
210233
grouped_filled.to_csv(self.output_path, index=False)
211-
print(f"Missing values filled and saved to '{self.output_path}'.")
234+
logger.info("Missing values filled and saved to '%s'.", self.output_path)
212235
else:
213-
print("Data is complete. No missing rows to fill.")
236+
logger.info("Data is complete. No missing rows to fill.")
214237
# Export the original grouped data to CSV
215238
grouped.reset_index().to_csv(self.output_path, index=False)
216-
print(f"Data saved to '{self.output_path}'.")
239+
logger.info("Data saved to '%s'.", self.output_path)
217240

218241
def run(self):
219242
"""Run the entire processing pipeline."""
@@ -223,14 +246,16 @@ def run(self):
223246
self.calculate_base_load()
224247
self.resample_and_add_temporal_columns()
225248
self.process_and_export_data()
226-
except Exception as e:
227-
print(f"An error occurred: {e}")
249+
except (FileNotFoundError, KeyError, ValueError) as e:
250+
logger.error("An error occurred: %s", e)
251+
except Exception as e: #pylint: disable=broad-exception-caught
252+
logger.error("An unexpected error occurred: %s", e)
228253

229254
# Example usage
230255
if __name__ == "__main__":
231256
# Initialize the processor with file path, timezone, and smoothing options
232257
processor = SolarWebExportProcessor(
233-
file_path='../config/SolarWebExport.xlsx',
258+
file_path='../config/PartialSolarWebExport.xlsx',
234259
output_path='../config/generated_load_profile.csv',
235260
timezone='Europe/Berlin',
236261
fill_empty_with_average=True,

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ numpy
22
pandas
33
PyYAML
44
requests
5-
paho-mqtt
5+
paho-mqtt
6+
pytz

0 commit comments

Comments
 (0)