1- #%%
1+ """
2+ Class to create load profile from SolarWeb Exports
3+ """
24import logging
5+ import os
36
4- logger = logging .getLogger ("__main__" )
5- logger .info ('[SolarWeb Export Processor] loading module' )
6-
7- import pandas as pd
87import numpy as np
8+ import pandas as pd
99import pytz
10- import os
10+
11+ logger = logging .getLogger ("__main__" )
12+ logger .info ('[SolarWeb Export Processor] loading module' )
1113
1214class SolarWebExportProcessor :
1315 """
@@ -20,13 +22,14 @@ class SolarWebExportProcessor:
2022 Additionally, the following columns can be included:
2123 - "Wattpilot / Energie Wattpilot" ergo consumption from Fronius Wattpilot
2224
23- If these additional columns are included then the load from these "smart" consumers will be subtracted from the
24- load to get a "base load" under the assumption that these will only run in the cheapest hours anyway.
25+ If these additional columns are included then the load from these "smart" consumers will be
26+ subtracted from the load to get a "base load" under the assumption that these will only run
27+ in the cheapest hours anyway.
2528
2629 The load profile will output month, weekday, hour and energy in Wh
2730
28- Any gaps in the timeseries will be filled with the weekday average across the existing dataset unless
29- fill_empty_with_average is set to False.
31+ Any gaps in the timeseries will be filled with the weekday average across the existing dataset
32+ unless fill_empty_with_average is set to False.
3033
3134 Key Features:
3235 - Loads data from a SolarWeb exported Excel file.
@@ -37,29 +40,34 @@ class SolarWebExportProcessor:
3740 - Exports processed data to a CSV file.
3841
3942 Attributes:
40- file_path (str): Path to the input Excel file.
41- output_path (str): Path to save the output CSV file.
42- timezone (str): Timezone for the data (default: 'Europe/Berlin').
43- fill_empty_with_average (bool): Whether to fill missing data with averages (default: True).
44- smooth_base_load (bool): Whether to smooth the wallbox ramps in the calculated base load (default: True).
45- smoothing_threshold (int): Threshold for detecting switched on/off EV wallbox loads (default: 1200 Watts).
46- smoothing_window_size (int): Window size for smoothing around EV charging (default: 2).
47- resample_freq (str): Frequency for resampling data (default: '60min').
48- df (pd.DataFrame): The main DataFrame holding the processed data.
43+ file_path (str): Path to the input Excel filed
44+ output_path (str): Path to save the output CSV file
45+ timezone (str): Timezone for the data (default: 'Europe/Berlin')
46+ fill_empty_with_average (bool): Whether to fill missing data with averages (default: True)
47+ smooth_base_load (bool): smooth the wallbox ramps in calculated base load (default: True)
48+ smoothing_threshold (int): Threshold to detect switched on/off EV charging (default: 1200 W)
49+ smoothing_window_size (int): Window size for smoothing around EV charging (default: 2)
50+ resample_freq (str): Frequency for resampling data (default: '60min')
51+ df (pd.DataFrame): The main DataFrame holding the processed data
4952 """
5053
51- def __init__ (self , file_path , output_path = '../config/generated_load_profile.csv' , timezone = 'Europe/Berlin' ,
52- fill_empty_with_average = True , smooth_base_load = True , smoothing_threshold = 1200 ,
53- smoothing_window_size = 2 , resample_freq = '60min' ):
54+ # pylint: disable=too-many-instance-attributes, too-many-arguments
55+ # Nine are ok here
56+
57+ def __init__ (self , file_path , * , output_path = '/config/generated_load_profile.csv' ,
58+ timezone = 'Europe/Berlin' ,
59+ fill_empty_with_average = True ,
60+ smooth_base_load = True , smoothing_threshold = 1200 ,smoothing_window_size = 2 ,
61+ resample_freq = '60min' ):
5462 """
5563 Initialize the SolarWebExportProcessor.
5664
5765 :param file_path: Path to the Excel file containing the data.
58- :param output_path: Path to save the output CSV file (default: '.. /config/generated_load_profile.csv').
66+ :param output_path: Path to output CSV file (default: '/config/generated_load_profile.csv').
5967 :param timezone: Timezone for the data (default: 'Europe/Berlin').
6068 :param fill_empty_with_average: Whether to fill missing data with averages (default: True).
6169 :param smooth_base_load: Whether to smooth the base load (default: True).
62- :param smoothing_threshold: Threshold for detecting sudden changes in base load (default: 1200 Watts ).
70+ :param smoothing_threshold: Threshold to detect switched on/off EV charge (default: 1200 W ).
6371 :param smoothing_window_size: Window size for smoothing around sudden changes (default: 2).
6472 :param resample_freq: Frequency for resampling data (default: '60min').
6573 """
@@ -86,7 +94,9 @@ def load_data(self):
8694 # Check if the data has at least 1-hour resolution
8795 time_diff = self .df .index .to_series ().diff ().min ()
8896 if time_diff > pd .Timedelta (hours = 1 ):
89- raise ValueError (f"The data resolution is larger than 1 hour. Minimum time difference found: { time_diff } ." )
97+ raise ValueError (
98+ f"The data resolution is larger than 1 hour. "
99+ f"Minimum time difference found: { time_diff } ." )
90100
91101 # Convert float64 columns to float32 for file/memory size
92102 float64_cols = self .df .select_dtypes (include = 'float64' ).columns
@@ -100,8 +110,9 @@ def process_wattpilot_columns(self):
100110
101111 # Step 2: Check if any matching columns exist
102112 if not wattpilot_columns .empty :
103- # Create a new column "Load_Wallbox" with the sum of these columns along axis=1 (across rows)
104- self .df [('Load_Wallbox' , '[Wh]' )] = self .df [wattpilot_columns ].sum (axis = 1 ) # this also replaces all NaN with 0
113+ # Create a new column "Load_Wallbox" with the row sum of these columns
114+ # This also replaces all NaN with 0
115+ self .df [('Load_Wallbox' , '[Wh]' )] = self .df [wattpilot_columns ].sum (axis = 1 )
105116 else :
106117 # If no matching columns exist, create a "Load_Wallbox" column with zeros
107118 self .df [('Load_Wallbox' , '[Wh]' )] = 0
@@ -111,36 +122,47 @@ def calculate_base_load(self):
111122
112123 # Check if the required column ('Verbrauch', '[Wh]') exists
113124 if ('Verbrauch' , '[Wh]' ) not in self .df .columns :
114- raise KeyError (f"The required column ('Verbrauch', '[Wh]') does not exist in the input data." )
125+ raise KeyError (
126+ "The required column ('Verbrauch', '[Wh]') does not exist in the input data."
127+ )
115128
116129 # Calculate a base load by removing the wallbox loads
117- self .df [('base_load' , '[Wh]' )] = self .df ['Verbrauch' , '[Wh]' ] - self .df ['Load_Wallbox' , '[Wh]' ]
130+ self .df [('base_load' , '[Wh]' )] = (
131+ self .df ['Verbrauch' , '[Wh]' ] - self .df ['Load_Wallbox' , '[Wh]' ]
132+ )
118133
119134 # Smoothing of data where Wallbox starts or ends charging due to artifacts (if enabled)
120135 if self .smooth_base_load :
121136 # Step 1: Calculate the difference between consecutive values
122137 self .df [('WB_diff' , '[Wh]' )] = self .df ['Load_Wallbox' , '[Wh]' ].diff ().abs ()
123138
124139 # Step 2: Define a threshold for detecting sudden changes (e.g., a large jump)
125- sudden_change_idx = self .df [self .df [('WB_diff' , '[Wh]' )] > self .smoothing_threshold / 12 ].index # We're at 5 min intervals thus / 12
140+ sudden_change_idx = self .df [
141+ self .df [('WB_diff' , '[Wh]' )] > self .smoothing_threshold / 12
142+ ].index # We're at 5 min intervals thus / 12
126143
127144 # Step 3: Create a new smoothed base load curve
128145 self .df [('base_load_smoothed' , '[Wh]' )] = self .df [('base_load' , '[Wh]' )]
129146
130- # Smooth only around the points with sudden changes (e.g., within a window of +/- smoothing_window_size)
147+ # Smooth only around the points with sudden changes
148+ # (e.g., within a window of +/- smoothing_window_size)
131149 for idx in sudden_change_idx :
132150 int_idx = self .df .index .get_loc (idx )
133151 # Get the window around the sudden change index (ensuring we can't go out of bounds)
134152 start_idx = max (1 , int_idx - self .smoothing_window_size )
135153 end_idx = min (len (self .df ) - 1 , int_idx + self .smoothing_window_size )
136154
137155 # Calculate averages before and after ramp
138- avg_before = self .df [('base_load_smoothed' , '[Wh]' )].iloc [start_idx - 1 :int_idx - 1 ].mean ()
139- avg_after = self .df [('base_load_smoothed' , '[Wh]' )].iloc [int_idx + 1 :end_idx + 1 ].mean ()
156+ avg_before = self .df [('base_load_smoothed' , '[Wh]' )].iloc [
157+ start_idx - 1 : int_idx - 1
158+ ].mean ()
159+ avg_after = self .df [('base_load_smoothed' , '[Wh]' )].iloc [
160+ int_idx + 1 : end_idx + 1
161+ ].mean ()
140162
141163 # Use averages to replace at detected ramps
142- self .df [('base_load_smoothed' , '[Wh]' )].iat [int_idx - 1 ] = avg_before # for ramp downs
143- self .df [('base_load_smoothed' , '[Wh]' )].iat [int_idx ] = avg_after # for ramp ups
164+ self .df [('base_load_smoothed' , '[Wh]' )].iat [int_idx - 1 ] = avg_before # ramp downs
165+ self .df [('base_load_smoothed' , '[Wh]' )].iat [int_idx ] = avg_after # ramp ups
144166 else :
145167 # If smoothing is disabled, use the unsmoothed base load
146168 self .df [('base_load_smoothed' , '[Wh]' )] = self .df [('base_load' , '[Wh]' )]
@@ -150,10 +172,10 @@ def resample_and_add_temporal_columns(self):
150172 # Resampling to hourly data
151173 def custom_agg (column ):
152174 if column .name [1 ] == '[Wh]' : # Check the second level of the column header
153- return column .sum () # Apply sum to 'Wh'
175+ result = column .sum () # Apply sum to 'Wh'
154176 else :
155- result = column .mean () # Apply mean to all others
156- return np . float32 ( result ) # Convert back to float32
177+ result = np . float32 ( column .mean ()) # Apply mean to all others & back to float32
178+ return result
157179
158180 # Resample dataframe to hourly data
159181 self .df = self .df .resample (self .resample_freq ).apply (custom_agg )
@@ -178,12 +200,13 @@ def calculate_energy(group):
178200 })
179201
180202 # Group by month, weekday, and hour, and calculate the mean energy consumption
181- grouped = self .df .groupby (['month' , 'weekday' , 'hour' ])['base_load_smoothed' ].apply (calculate_energy ).unstack ()
203+ grouped = self .df .groupby (['month' , 'weekday' , 'hour' ]
204+ )['base_load_smoothed' ].apply (calculate_energy ).unstack ()
182205
183206 # Check if the grouped result is missing rows
184207 expected_rows = 12 * 7 * 24 # 12 months, 7 weekdays, 24 hours
185208 if len (grouped ) < expected_rows and self .fill_empty_with_average :
186- print ("Data is missing rows. Filling missing values with averages..." )
209+ logger . info ("Data is missing rows. Filling missing values with averages..." )
187210
188211 # Create a complete multi-index for all combinations of month, weekday, and hour
189212 full_index = pd .MultiIndex .from_product (
@@ -208,12 +231,12 @@ def calculate_energy(group):
208231
209232 # Write the result to a CSV file
210233 grouped_filled .to_csv (self .output_path , index = False )
211- print ( f "Missing values filled and saved to '{ self . output_path } '." )
234+ logger . info ( "Missing values filled and saved to '%s '." , self . output_path )
212235 else :
213- print ("Data is complete. No missing rows to fill." )
236+ logger . info ("Data is complete. No missing rows to fill." )
214237 # Export the original grouped data to CSV
215238 grouped .reset_index ().to_csv (self .output_path , index = False )
216- print ( f "Data saved to '{ self . output_path } '." )
239+ logger . info ( "Data saved to '%s '." , self . output_path )
217240
218241 def run (self ):
219242 """Run the entire processing pipeline."""
@@ -223,14 +246,16 @@ def run(self):
223246 self .calculate_base_load ()
224247 self .resample_and_add_temporal_columns ()
225248 self .process_and_export_data ()
226- except Exception as e :
227- print (f"An error occurred: { e } " )
249+ except (FileNotFoundError , KeyError , ValueError ) as e :
250+ logger .error ("An error occurred: %s" , e )
251+ except Exception as e : #pylint: disable=broad-exception-caught
252+ logger .error ("An unexpected error occurred: %s" , e )
228253
229254# Example usage
230255if __name__ == "__main__" :
231256 # Initialize the processor with file path, timezone, and smoothing options
232257 processor = SolarWebExportProcessor (
233- file_path = '../config/SolarWebExport .xlsx' ,
258+ file_path = '../config/PartialSolarWebExport .xlsx' ,
234259 output_path = '../config/generated_load_profile.csv' ,
235260 timezone = 'Europe/Berlin' ,
236261 fill_empty_with_average = True ,
0 commit comments