datacommonsorg
diff --git a/‎scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py‎
Lines changed: 18 additions & 2 deletions b/‎scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py‎
Lines changed: 17 additions & 2 deletions b/‎scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎scripts/us_census/pep/population_estimates_by_asr/process.py‎
Lines changed: 5 additions & 4 deletions b/‎scripts/us_census/pep/population_estimates_by_asr/process.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎scripts/us_epa/facilities_helper_test.py‎
Lines changed: 130 additions & 0 deletions b/‎scripts/us_epa/facilities_helper_test.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎scripts/us_epa/parent_company/generate_svobs_helper_test.py‎
Lines changed: 76 additions & 0 deletions b/‎scripts/us_epa/parent_company/generate_svobs_helper_test.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎scripts/us_epa/parent_company/process_parent_company.py‎
Lines changed: 1 addition & 3 deletions b/‎scripts/us_epa/parent_company/process_parent_company.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎scripts/us_epa/util/facilities_helper.py‎
Lines changed: 12 additions & 38 deletions b/‎scripts/us_epa/util/facilities_helper.py‎
Lines changed: 12 additions & 38 deletions
diff --git a/‎scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv‎
Lines changed: 6 additions & 0 deletions b/‎scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv‎
Lines changed: 4 additions & 0 deletions b/‎scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv‎
Lines changed: 4 additions & 0 deletions
@@ -16,6 +16,8 @@
 '''
 import os
 import pandas as pd
+import requests
+from absl import logging
 
 
 def national1900(output_folder: str):
@@ -41,8 +43,22 @@ def national1900(output_folder: str):
         # 8=Female_NonWhiteAlone
         cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
         # reading the csv format input file and converting it to a dataframe
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\
-            skipfooter=15,encoding='ISO-8859-1')
+        try:
+            # Check if the URL is accessible
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=9,
+                             skipfooter=15,
+                             encoding='ISO-8859-1')
+        except Exception as e:
+            logging.error(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
 
@@ -16,6 +16,8 @@
 '''
 import os
 import pandas as pd
+import requests
+from absl import logging
 
 
 def national1960(output_folder: str):
@@ -38,8 +40,21 @@ def national1960(output_folder: str):
         ]
         # Reading the csv format input file and converting it to a dataframe.
         # Skipping unwanted rows from top and bottom.
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\
-            skipfooter=15)
+        try:
+            # Check if the URL is accessible
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=8,
+                             skipfooter=15)
+        except Exception as e:
+            logging.error(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
 
@@ -68,13 +68,14 @@ def add_future_year_urls():
         for YEAR in range(2030, 2020, -1):
             url_to_check = url.format(YEAR=YEAR)
             try:
-                check_url = requests.head(url_to_check)
+                check_url = requests.head(url_to_check, allow_redirects=True)
                 if check_url.status_code == 200:
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
-
-            except:
-                logging.error(f"URL is not accessable {url_to_check}")
+                else:
+                    logging.warning(f"URL is not accessible: {url_to_check}")
+            except Exception as e:
+                logging.error(f"URL is not accessible {url_to_check}: {e}")
 
 
 MCF_TEMPLATE = ("Node: dcid:{pv1}\n"
 
@@ -0,0 +1,130 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for facilities_helper.py."""
+
+import sys
+import unittest
+from pathlib import Path
+from unittest import mock
+
+# Kept at scripts/us_epa/ instead of scripts/us_epa/util/ because the repo's
+# unittest discovery for scripts/us_epa would import it as
+# util.facilities_helper_test and collide with the top-level data/util package.
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.us_epa.util import facilities_helper as fh
+
+
+class FacilitiesHelperTest(unittest.TestCase):
+
+    def test_get_all_statvars_returns_empty_set_for_empty_input(self):
+        with mock.patch.object(fh, "get_datacommons_client") as mock_client:
+            self.assertEqual(fh.get_all_statvars([]), set())
+
+        mock_client.assert_not_called()
+
+    def test_get_all_statvars_fetches_and_unions_variables(self):
+        facilities = [f"epaGhgrpFacilityId/{i}" for i in range(55)]
+        mock_client = mock.Mock()
+        mock_client.observation.fetch.side_effect = [
+            mock.Mock(to_dict=mock.Mock(
+                return_value={
+                    "byVariable": {
+                        "Count_Person": {
+                            "byEntity": {
+                                facilities[0]: {}
+                            }
+                        },
+                        "Median_Age_Person": {
+                            "byEntity": {
+                                facilities[1]: {}
+                            }
+                        },
+                    }
+                })),
+            mock.Mock(to_dict=mock.Mock(
+                return_value={
+                    "byVariable": {
+                        "Count_Person": {
+                            "byEntity": {
+                                facilities[50]: {}
+                            }
+                        },
+                        "Count_Household": {
+                            "byEntity": {
+                                facilities[54]: {}
+                            }
+                        },
+                    }
+                })),
+        ]
+
+        with mock.patch.object(fh,
+                               "get_datacommons_client",
+                               return_value=mock_client):
+            stat_vars = fh.get_all_statvars(facilities)
+
+        self.assertEqual(stat_vars, {
+            "Count_Person",
+            "Median_Age_Person",
+            "Count_Household",
+        })
+        self.assertEqual(mock_client.observation.fetch.call_count, 2)
+        self.assertEqual(
+            mock_client.observation.fetch.call_args_list[0].kwargs, {
+                "entity_dcids":
+                    facilities[:50],
+                "variable_dcids": [],
+                "select": [
+                    fh.ObservationSelect.VARIABLE,
+                    fh.ObservationSelect.ENTITY,
+                ],
+            })
+        self.assertEqual(
+            mock_client.observation.fetch.call_args_list[1].kwargs, {
+                "entity_dcids":
+                    facilities[50:],
+                "variable_dcids": [],
+                "select": [
+                    fh.ObservationSelect.VARIABLE,
+                    fh.ObservationSelect.ENTITY,
+                ],
+            })
+
+    def test_get_all_statvars_allows_entities_missing_from_response(self):
+        mock_response = mock.Mock(to_dict=mock.Mock(
+            return_value={
+                "byVariable": {
+                    "Count_Person": {
+                        "byEntity": {
+                            "epaGhgrpFacilityId/1": {}
+                        }
+                    }
+                }
+            }))
+        mock_client = mock.Mock()
+        mock_client.observation.fetch.return_value = mock_response
+
+        with mock.patch.object(fh,
+                               "get_datacommons_client",
+                               return_value=mock_client):
+            stat_vars = fh.get_all_statvars(
+                ["epaGhgrpFacilityId/1", "epaGhgrpFacilityId/2"])
+
+        self.assertEqual(stat_vars, {"Count_Person"})
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,76 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for generate_svobs_helper()."""
+
+import sys
+import unittest
+from pathlib import Path
+from unittest import mock
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.us_epa.parent_company import process_parent_company
+
+
+class GenerateSvobsHelperTest(unittest.TestCase):
+
+    def test_generate_svobs_helper_wires_statvars_and_svobs(self):
+        ownership = {
+            ("epaGhgrpFacilityId/1001", "2018"): {
+                "EpaParentCompany/A": 100.0,
+            },
+            ("epaGhgrpFacilityId/1001", "2019"): {
+                "EpaParentCompany/A": 100.0,
+            },
+            ("epaGhgrpFacilityId/1002", "2019"): {
+                "EpaParentCompany/B": 100.0,
+            },
+        }
+        facility_sv_map = {"epaGhgrpFacilityId/1001": {"Count_Person": {}}}
+        facets = {"facet-1": {"observationPeriod": "P1Y"}}
+
+        with mock.patch.object(process_parent_company,
+                               "_facility_year_company_percentages",
+                               return_value=ownership):
+            with mock.patch.object(process_parent_company.fh,
+                                   "get_all_statvars",
+                                   return_value={"Count_Person"
+                                                }) as mock_statvars:
+                with mock.patch.object(process_parent_company.fh,
+                                       "get_all_svobs",
+                                       return_value=(facility_sv_map,
+                                                     facets)) as mock_svobs:
+                    with mock.patch.object(
+                            process_parent_company,
+                            "process_svobs") as mock_process_svobs:
+                        process_parent_company.generate_svobs_helper(
+                            "ownership.csv", "/tmp/svobs")
+
+        statvars_facilities = mock_statvars.call_args.args[0]
+        self.assertEqual(set(statvars_facilities), {
+            "epaGhgrpFacilityId/1001",
+            "epaGhgrpFacilityId/1002",
+        })
+        self.assertEqual(set(mock_svobs.call_args.args[0]), {
+            "epaGhgrpFacilityId/1001",
+            "epaGhgrpFacilityId/1002",
+        })
+        self.assertEqual(mock_svobs.call_args.args[1], {"Count_Person"})
+        mock_process_svobs.assert_called_once_with("/tmp/svobs", ownership,
+                                                   facility_sv_map, facets)
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -42,8 +42,6 @@
 flags.DEFINE_string("svobs_output_path", "svobs",
                     "Output directory for StatVarObs.")
 
-_DC_API_URL = "https://api.datacommons.org/place/stat-vars"
-
 # V_PARENT_COMPANY_INFO table
 _TABLE_PREFIX = "D_GHG_B"
 _TABLE = "V_PARENT_COMPANY_INFO"
@@ -487,7 +485,7 @@ def generate_svobs_helper(ownership_relationships_filepath, svobs_path_info):
 
     facilities = list(facilities)
 
-    statVars = fh.get_all_statvars(_DC_API_URL, facilities)
+    statVars = fh.get_all_statvars(facilities)
     facility_sv_map, facets = fh.get_all_svobs(facilities, statVars)
     print("# SVs : %d" % len(statVars))
     print("# Facilities : %d" % len(facility_sv_map))
 
@@ -23,13 +23,9 @@
 
 from datacommons_client.models.observation import ObservationDate
 from datacommons_client.models.observation import ObservationSelect
-import json
 import pandas as pd
-import requests
 
 from re import sub
-from requests.structures import CaseInsensitiveDict
-from requests.exceptions import HTTPError
 
 REPO_ROOT = Path(__file__).resolve().parents[3]
 sys.path.insert(0, str(REPO_ROOT))
@@ -165,49 +161,27 @@ def get_county_candidates(zcta):
     return filtered_lists
 
 
-def _dc_sv_query(dc_api_url, data_string, svs=set()):
-    headers = CaseInsensitiveDict()
-    headers["Content-Type"] = "application/json"
-    try:
-        resp = requests.post(dc_api_url, headers=headers, data=data_string)
-    except HTTPError as http_err:
-        print(f'HTTP error occurred: {http_err}')
-        return set()
-    except Exception as e:
-        print(f'Some unkonw Exceptionoccurred: {e}')
-        return set()
-
-    d = json.loads(resp.content.decode('utf8').replace("'", '"'))
-    for p, p_dict in d["places"].items():
-        if "statVars" in p_dict:
-            sv_list = d["places"][p]["statVars"]
-            for sv in sv_list:
-                svs.add(sv)
-    return svs
-
-
 # Returns a union all StatVars associated with all facilities using the
 # Data Commons API.
-def get_all_statvars(dc_api_url, facility_ids):
+def get_all_statvars(facility_ids):
     if not facility_ids:
         return set()
 
+    client = get_datacommons_client()
     statVars = set()
-    # 500 facilities at a time.
     n_facilities = 50
     print("****Getting existing StatVars for Facilities.")
     for i in range(0, len(facility_ids), n_facilities):
-        if i % n_facilities == 0:
-            print(f'**Processing facilities from index {i} to {i+n_facilities}')
-        # Compose the API query params.
-        # Need to be of the form:
-        # '{"dcids":["epaGhgrpFacilityId/1004962","epaGhgrpFacilityId/1010899"]}'
-        data_string = "{'dcids': ["
-        for f in facility_ids[i:i + n_facilities]:
-            data_string += '"%s",' % f
-        data_string += ']}'
-
-        statVars = _dc_sv_query(dc_api_url, data_string, statVars)
+        print(f'**Processing facilities from index {i} to {i+n_facilities}')
+        response = client.observation.fetch(
+            entity_dcids=facility_ids[i:i + n_facilities],
+            variable_dcids=[],
+            select=[
+                ObservationSelect.VARIABLE,
+                ObservationSelect.ENTITY,
+            ],
+        ).to_dict()
+        statVars.update(response.get('byVariable', {}).keys())
 
     print("****Done getting existing StatVars.")
     print("***********************************.")
 
@@ -0,0 +1,6 @@
+StatVar,NumPlaces,MinDate,MeasurementMethods,Units
+InterestRate_TreasuryNote_3Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBond_20Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_5Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_10Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBill_1Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
@@ -0,0 +1,4 @@
+date,1-Month,3-Month,6-Month,1-Year,2-Year,3-Year,5-Year,7-Year,10-Year,20-Year,30-Year
+1962-01-02,,,,3.22,,3.70,3.88,,4.06,4.07,
+1962-02-01,,,,3.30,,3.81,4.00,,4.09,4.13,
+1962-04-19,,,,3.00,,3.37,3.60,,3.82,3.91,