-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_loader.py
More file actions
47 lines (38 loc) · 1.43 KB
/
Copy pathdata_loader.py
File metadata and controls
47 lines (38 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import glob
import os
def load_uidai_data(directory_path):
all_files = glob.glob(os.path.join(directory_path, "*.csv"))
df_list = []
print(f"Found {len(all_files)} CSV files in {directory_path}")
for f in all_files:
try:
df = pd.read_csv(f)
if 'state' in df.columns:
df_list.append(df)
else:
print(f"Skipping {f}: 'state' column not found.")
except Exception as e:
print(f"Error reading {f}: {e}")
if not df_list:
print("No valid data loaded.")
return pd.DataFrame()
full_df = pd.concat(df_list, ignore_index=True)
full_df.columns = [c.strip().lower() for c in full_df.columns]
if 'date' in full_df.columns:
full_df['date'] = pd.to_datetime(full_df['date'], dayfirst=True, errors='coerce')
demo_cols = [c for c in full_df.columns if c.startswith('demo_')]
if demo_cols:
full_df['total_updates'] = full_df[demo_cols].sum(axis=1)
elif 'total_updates' not in full_df.columns:
full_df['total_updates'] = 0
return full_df
if __name__ == "__main__":
path = os.getcwd()
print(f"Loading data from {path}...")
df = load_uidai_data(path)
print(f"Loaded {len(df)} rows.")
if not df.empty:
print(df.head())
print("Columns:", df.columns.tolist())
print("States found:", df['state'].unique())