-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtransform_data.py
More file actions
97 lines (83 loc) · 3.51 KB
/
transform_data.py
File metadata and controls
97 lines (83 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
from pathlib import Path
import json
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
path_name = Path(__file__).parent.parent / 'data' / 'weather_data.json'
columns_names_to_drop = ['weather', 'weather_icon', 'sys.type']
columns_names_to_rename = {
"base": "base",
"visibility": "visibility",
"dt": "datetime",
"timezone": "timezone",
"id": "city_id",
"name": "city_name",
"cod": "code",
"coord.lon": "longitude",
"coord.lat": "latitude",
"main.temp": "temperature",
"main.feels_like": "feels_like",
"main.temp_min": "temp_min",
"main.temp_max": "temp_max",
"main.pressure": "pressure",
"main.humidity": "humidity",
"main.sea_level": "sea_level",
"main.grnd_level": "grnd_level",
"wind.speed": "wind_speed",
"wind.deg": "wind_deg",
"wind.gust": "wind_gust",
"clouds.all": "clouds",
"sys.type": "sys_type",
"sys.id": "sys_id",
"sys.country": "country",
"sys.sunrise": "sunrise",
"sys.sunset": "sunset",
# weather_id, weather_main, weather_description
}
columns_to_normalize_datetime = ['datetime', 'sunrise', 'sunset']
def create_dataframe(path_name:str) -> pd.DataFrame:
logging.info("→ Criando DataFrame do arquivo JSON...")
path = path_name
if not path.exists():
raise FileNotFoundError(f"Arquivo não encontrado: {path}")
with open(path) as f:
data = json.load(f)
df = pd.json_normalize(data)
logging.info(f"\n✓ DataFrame criado com {len(df)} linha(s)")
return df
def normalize_weather_columns(df: pd.DataFrame) -> pd.DataFrame:
df_weather = pd.json_normalize(df['weather'].apply(lambda x: x[0]))
df_weather = df_weather.rename(columns={
'id': 'weather_id',
'main': 'weather_main',
'description': 'weather_description',
'icon': 'weather_icon'
})
df = pd.concat([df, df_weather], axis=1)
logging.info(f"\n✓ Coluna 'weather' normalizada - {len(df.columns)} colunas")
return df
def drop_columns(df: pd.DataFrame, columns_names:list[str]) -> pd.DataFrame:
logging.info(f"\n→ Removendo colunas: {columns_names}")
df = df.drop(columns=columns_names)
logging.info(f"✓ Colunas removidas - {len(df.columns)} colunas restantes")
return df
def rename_columns(df: pd.DataFrame, columns_names:dict[str, str]) -> pd.DataFrame:
logging.info(f"\n→ Renomeando {len(columns_names)} colunas...")
df = df.rename(columns=columns_names)
logging.info("✓ Colunas renomeadas")
return df
def normalize_datetime_columns(df: pd.DataFrame, columns_names:list[str]) -> pd.DataFrame:
logging.info(f"\n→ Convertendo colunas para datetime: {columns_names}")
for name in columns_names:
df[name] = pd.to_datetime(df[name], unit='s', utc=True).dt.tz_convert('America/Sao_Paulo')
logging.info("✓ Colunas convertidas para datetime\n")
return df
def data_transformations():
print("\n Iniciando transformações")
df = create_dataframe(path_name)
df = normalize_weather_columns(df)
df = drop_columns(df, columns_names_to_drop)
df = rename_columns(df, columns_names_to_rename)
df = normalize_datetime_columns(df, columns_to_normalize_datetime)
logging.info("✓ Transformações concluídas\n")
return df