You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"""Create required folders as layer directories."""
37
+
start_time=time.time()
36
38
folders= [self.bronze_folder
37
39
, self.silver_folder
38
40
, self.gold_folder
39
41
, self.statistical_analysis_folder]
40
42
forfolderinfolders:
41
43
os.makedirs(folder
42
44
, exist_ok=True)
45
+
elapsed_time=time.time() -start_time# End timing
46
+
logging.info(f"Created folders in {elapsed_time:.2f} seconds")
43
47
44
48
defsaving_step(self
45
49
, df : pd.DataFrame
@@ -56,10 +60,13 @@ def saving_step(self
56
60
Returns:
57
61
File: Saved file at layer directory.
58
62
"""
63
+
start_time=time.time()
59
64
path=os.path.join(folder
60
65
, filename)
61
66
df.to_parquet(path
62
67
, engine='pyarrow')
68
+
elapsed_time=time.time() -start_time
69
+
logging.info(f"Saved file {filename} in {elapsed_time:.2f} seconds")
63
70
64
71
defbronze_fetch(self
65
72
, series : str
@@ -78,6 +85,7 @@ def bronze_fetch(self
78
85
Returns:
79
86
DataFrame: Fetched data as pandas DataFrame, then saving at Bronze layer, or None if an error occurs (with an error log).
80
87
"""
88
+
start_time=time.time()
81
89
try:
82
90
# Special handling for IDHM 2010 (IPEAdataR)
83
91
iffilename=='IDHM_2010.parquet':
@@ -101,6 +109,8 @@ def bronze_fetch(self
101
109
self.saving_step(raw_data
102
110
, self.bronze_folder
103
111
, filename)
112
+
elapsed_time=time.time() -start_time
113
+
logging.info(f"Bronze fetching {filename} in {elapsed_time:.2f} seconds")
104
114
returnraw_data
105
115
exceptExceptionase:
106
116
logging.error(f'Error fetching data for {filename}: {e}')
@@ -119,6 +129,7 @@ def silver_transform(self
119
129
Returns:
120
130
DataFrame: Processed data as pandas DataFrame, then saving at Silver layer, or None if an error occurs (with an error log).
121
131
"""
132
+
start_time=time.time()
122
133
try:
123
134
if'IDHM_2010.parquet'infilename:
124
135
date_filter=pd.to_datetime('2010-01-01')
@@ -164,6 +175,8 @@ def silver_transform(self
164
175
self.saving_step(transf_df
165
176
, self.silver_folder
166
177
, filename)
178
+
elapsed_time=time.time() -start_time
179
+
logging.info(f"Silver transforming {filename} in {elapsed_time:.2f} seconds")
167
180
returntransf_df
168
181
exceptExceptionase:
169
182
logging.error(f'Error transforming data for {filename}: {e}')
@@ -180,6 +193,7 @@ def gold_finish(self
180
193
Returns:
181
194
DataFrame: Processed data as a single pandas DataFrame, then saving at Gold layer and DuckDB, or None if an error occurs. Also, Descriptive Summary as a csv file, then saving at Statistical Analysis folder, or None if an error occurs.
182
195
"""
196
+
start_time=time.time()
183
197
try:
184
198
df=self.join_list[0]
185
199
fortransf_dfinself.join_list[1:]:
@@ -219,7 +233,8 @@ def gold_finish(self
219
233
conn=ddb.connect(self.db_path)
220
234
conn.execute('CREATE TABLE IF NOT EXISTS df AS SELECT * FROM df')
221
235
conn.close()
222
-
236
+
elapsed_time=time.time() -start_time
237
+
logging.info(f"Gold finishing {filename} in {elapsed_time:.2f} seconds")
223
238
returndf
224
239
exceptExceptionase:
225
240
logging.error(f'Error finalizing data for {filename}: {e}')
@@ -244,6 +259,7 @@ def process_data(self
244
259
if bronze_fetch is done, and silver_transform isn't, do silver_transform,\n
245
260
if silver_transform is done, prepare pandas DataFrame to be processed at gold_finish.
246
261
"""
262
+
start_time=time.time()
247
263
bronze_df=self.bronze_fetch(series
248
264
, year
249
265
, filename
@@ -253,6 +269,8 @@ def process_data(self
253
269
, filename)
254
270
ifsilver_dfisnotNone:
255
271
self.join_list.append(silver_df)
272
+
elapsed_time=time.time() -start_time
273
+
logging.info(f"Processing data {filename} in {elapsed_time:.2f} seconds")
256
274
257
275
defanalyze_data(self
258
276
, df : pd.DataFrame) ->None:
@@ -266,6 +284,7 @@ def analyze_data(self
266
284
Statistical Model calculations and conversion to HTML.\n
267
285
Correlation Matrix, Linear Regression and ANOVA, all saved in a single HTML file at Statistical Analysis folder.
268
286
"""
287
+
start_time=time.time()
269
288
try:
270
289
corr_matrix=df[['IDHM 2010'
271
290
, 'Carga Tributária Municipal 2010'
@@ -328,14 +347,19 @@ def analyze_data(self
328
347
withopen(report_filename
329
348
, 'w') asf:
330
349
f.write(html_report)
350
+
elapsed_time=time.time() -start_time
351
+
logging.info(f"Analyzing data {report_filename} in {elapsed_time:.2f} seconds")
331
352
exceptExceptionase:
332
353
logging.error(f'Error analyzing data: {e}')
333
354
334
355
classDatabase:
335
356
def__init__(self):
336
357
"""Create connection to DuckDB database."""
358
+
start_time=time.time()
337
359
self._install_extensions()
338
360
self.conn=ddb.connect('ipea.db')
361
+
elapsed_time=time.time() -start_time
362
+
logging.info(f"Initialized database in {elapsed_time:.2f} seconds")
0 commit comments