Skip to content

Commit 4f73512

Browse files
authored
code fix for USCensusPEP_AgeSexRace (#1950)
* code fix * code fix * code fix * resloved comments * resloved comments
1 parent c278165 commit 4f73512

3 files changed

Lines changed: 40 additions & 8 deletions

File tree

scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
'''
1717
import os
1818
import pandas as pd
19+
import requests
20+
from absl import logging
1921

2022

2123
def national1900(output_folder: str):
@@ -41,8 +43,22 @@ def national1900(output_folder: str):
4143
# 8=Female_NonWhiteAlone
4244
cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
4345
# reading the csv format input file and converting it to a dataframe
44-
df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\
45-
skipfooter=15,encoding='ISO-8859-1')
46+
try:
47+
# Check if the URL is accessible
48+
response = requests.head(url, allow_redirects=True)
49+
if response.status_code != 200:
50+
logging.warning(f"Skipping {url} as it is not accessible.")
51+
continue
52+
53+
df = pd.read_csv(url,
54+
names=cols,
55+
engine='python',
56+
skiprows=9,
57+
skipfooter=15,
58+
encoding='ISO-8859-1')
59+
except Exception as e:
60+
logging.error(f"Error reading {url}: {e}")
61+
continue
4662
#Writing raw data to csv
4763
df.to_csv(os.path.join(
4864
os.path.dirname(os.path.abspath(__file__)), "raw_data",

scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
'''
1717
import os
1818
import pandas as pd
19+
import requests
20+
from absl import logging
1921

2022

2123
def national1960(output_folder: str):
@@ -38,8 +40,21 @@ def national1960(output_folder: str):
3840
]
3941
# Reading the csv format input file and converting it to a dataframe.
4042
# Skipping unwanted rows from top and bottom.
41-
df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\
42-
skipfooter=15)
43+
try:
44+
# Check if the URL is accessible
45+
response = requests.head(url, allow_redirects=True)
46+
if response.status_code != 200:
47+
logging.warning(f"Skipping {url} as it is not accessible.")
48+
continue
49+
50+
df = pd.read_csv(url,
51+
names=cols,
52+
engine='python',
53+
skiprows=8,
54+
skipfooter=15)
55+
except Exception as e:
56+
logging.error(f"Error reading {url}: {e}")
57+
continue
4358
#Writing raw data to csv
4459
df.to_csv(os.path.join(
4560
os.path.dirname(os.path.abspath(__file__)), "raw_data",

scripts/us_census/pep/population_estimates_by_asr/process.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,14 @@ def add_future_year_urls():
6868
for YEAR in range(2030, 2020, -1):
6969
url_to_check = url.format(YEAR=YEAR)
7070
try:
71-
check_url = requests.head(url_to_check)
71+
check_url = requests.head(url_to_check, allow_redirects=True)
7272
if check_url.status_code == 200:
7373
_FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
7474
break
75-
76-
except:
77-
logging.error(f"URL is not accessable {url_to_check}")
75+
else:
76+
logging.warning(f"URL is not accessible: {url_to_check}")
77+
except Exception as e:
78+
logging.error(f"URL is not accessible {url_to_check}: {e}")
7879

7980

8081
MCF_TEMPLATE = ("Node: dcid:{pv1}\n"

0 commit comments

Comments
 (0)