code fix for USCensusPEP_AgeSexRace (#1950)

niveditasing · web-flow · commit 4f7351268912 · 2026-04-13T11:21:14.000Z
* code fix

* code fix

* code fix

* resloved comments

* resloved comments
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
@@ -16,6 +16,8 @@
 '''
 import os
 import pandas as pd
+import requests
+from absl import logging
 
 
 def national1900(output_folder: str):
@@ -41,8 +43,22 @@ def national1900(output_folder: str):
         # 8=Female_NonWhiteAlone
         cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
         # reading the csv format input file and converting it to a dataframe
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\
-            skipfooter=15,encoding='ISO-8859-1')
+        try:
+            # Check if the URL is accessible
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=9,
+                             skipfooter=15,
+                             encoding='ISO-8859-1')
+        except Exception as e:
+            logging.error(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
@@ -16,6 +16,8 @@
 '''
 import os
 import pandas as pd
+import requests
+from absl import logging
 
 
 def national1960(output_folder: str):
@@ -38,8 +40,21 @@ def national1960(output_folder: str):
         ]
         # Reading the csv format input file and converting it to a dataframe.
         # Skipping unwanted rows from top and bottom.
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\
-            skipfooter=15)
+        try:
+            # Check if the URL is accessible
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=8,
+                             skipfooter=15)
+        except Exception as e:
+            logging.error(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -68,13 +68,14 @@ def add_future_year_urls():
         for YEAR in range(2030, 2020, -1):
             url_to_check = url.format(YEAR=YEAR)
             try:
-                check_url = requests.head(url_to_check)
+                check_url = requests.head(url_to_check, allow_redirects=True)
                 if check_url.status_code == 200:
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
-
-            except:
-                logging.error(f"URL is not accessable {url_to_check}")
+                else:
+                    logging.warning(f"URL is not accessible: {url_to_check}")
+            except Exception as e:
+                logging.error(f"URL is not accessible {url_to_check}: {e}")
 
 
 MCF_TEMPLATE = ("Node: dcid:{pv1}\n"