|
| 1 | +""" |
| 2 | +pfisd_extract_shapefiles.py |
| 3 | +
|
| 4 | +Extracts all layers from the Pflugerville ISD Attendance Boundaries |
| 5 | +ArcGIS Experience Builder app and saves them as shapefiles. |
| 6 | +
|
| 7 | +Source: https://experience.arcgis.com/experience/0bc78994af534cd1a703c8959abeac9d |
| 8 | +WebMap: https://Pflugervilleisd.maps.arcgis.com/sharing/rest/content/items/bb587c1043a949cca04f1b1904c235e3/data |
| 9 | +
|
| 10 | +The layers are embedded as inline Feature Collections (no FeatureServer endpoint). |
| 11 | +All geometry arrives in Web Mercator (EPSG:3857) and is reprojected to WGS84 (EPSG:4326). |
| 12 | +
|
| 13 | +Dependencies: |
| 14 | + pip install requests geopandas shapely pyproj fiona |
| 15 | +
|
| 16 | +Output: |
| 17 | + One .shp file per layer, written to ./export/ (relative to this script) |
| 18 | +""" |
| 19 | + |
| 20 | +import argparse |
| 21 | +import json |
| 22 | +import os |
| 23 | +import sys |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +import requests |
| 27 | +import geopandas as gpd |
| 28 | +from shapely.geometry import Point, Polygon, MultiPolygon, shape |
| 29 | +from pyproj import Transformer |
| 30 | + |
| 31 | +# ───────────────────────────────────────────── |
| 32 | +# CONFIG |
| 33 | +# ───────────────────────────────────────────── |
| 34 | + |
| 35 | +WEBMAP_URL = ( |
| 36 | + "https://Pflugervilleisd.maps.arcgis.com/sharing/rest/content/items/" |
| 37 | + "bb587c1043a949cca04f1b1904c235e3/data?f=json" |
| 38 | +) |
| 39 | + |
| 40 | +# Output into the export/ folder inside this package |
| 41 | +OUTPUT_DIR = Path(__file__).resolve().parent / "export" |
| 42 | + |
| 43 | +# Source CRS (Web Mercator) → Target CRS (WGS84 lat/lon) |
| 44 | +transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True) |
| 45 | + |
| 46 | +# ───────────────────────────────────────────── |
| 47 | +# GEOMETRY HELPERS |
| 48 | +# ───────────────────────────────────────────── |
| 49 | + |
| 50 | +def reproject_ring(ring): |
| 51 | + """Convert a list of [x, y] Web Mercator coords to (lon, lat) WGS84.""" |
| 52 | + return [transformer.transform(x, y) for x, y in ring] |
| 53 | + |
| 54 | + |
| 55 | +def esri_polygon_to_shapely(esri_geom): |
| 56 | + """ |
| 57 | + Convert an ESRI polygon geometry dict (with 'rings') to a Shapely geometry. |
| 58 | + Handles single polygons and multipolygons (multiple outer rings). |
| 59 | + """ |
| 60 | + rings = esri_geom.get("rings", []) |
| 61 | + if not rings: |
| 62 | + return None |
| 63 | + |
| 64 | + reprojected = [reproject_ring(r) for r in rings] |
| 65 | + |
| 66 | + # ESRI uses winding order to distinguish outer vs hole rings. |
| 67 | + # For simplicity we treat each ring as its own polygon; Shapely's |
| 68 | + # buffer(0) trick cleans up any self-intersections. |
| 69 | + polys = [Polygon(r) for r in reprojected if len(r) >= 3] |
| 70 | + if not polys: |
| 71 | + return None |
| 72 | + if len(polys) == 1: |
| 73 | + return polys[0].buffer(0) |
| 74 | + return MultiPolygon([p.buffer(0) for p in polys]).buffer(0) |
| 75 | + |
| 76 | + |
| 77 | +def esri_point_to_shapely(esri_geom): |
| 78 | + """Convert an ESRI point geometry dict to a Shapely Point in WGS84.""" |
| 79 | + x = esri_geom.get("x") |
| 80 | + y = esri_geom.get("y") |
| 81 | + if x is None or y is None: |
| 82 | + return None |
| 83 | + lon, lat = transformer.transform(x, y) |
| 84 | + return Point(lon, lat) |
| 85 | + |
| 86 | + |
| 87 | +# ───────────────────────────────────────────── |
| 88 | +# LAYER EXTRACTION |
| 89 | +# ───────────────────────────────────────────── |
| 90 | + |
| 91 | +def extract_layer(layer_data, layer_title): |
| 92 | + """ |
| 93 | + Given a single ESRI featureCollection layer dict, return a GeoDataFrame. |
| 94 | + Handles both polygon and point geometry types. |
| 95 | + """ |
| 96 | + layer_def = layer_data.get("layerDefinition", {}) |
| 97 | + feature_set = layer_data.get("featureSet", {}) |
| 98 | + geom_type = layer_def.get("geometryType", "") |
| 99 | + features = feature_set.get("features", []) |
| 100 | + |
| 101 | + if not features: |
| 102 | + print(f" [WARN] No features found in layer: {layer_title}") |
| 103 | + return None |
| 104 | + |
| 105 | + rows = [] |
| 106 | + skipped = 0 |
| 107 | + |
| 108 | + for feat in features: |
| 109 | + esri_geom = feat.get("geometry", {}) |
| 110 | + attrs = feat.get("attributes", {}) |
| 111 | + |
| 112 | + if geom_type == "esriGeometryPolygon": |
| 113 | + geom = esri_polygon_to_shapely(esri_geom) |
| 114 | + elif geom_type == "esriGeometryPoint": |
| 115 | + geom = esri_point_to_shapely(esri_geom) |
| 116 | + else: |
| 117 | + print(f" [WARN] Unsupported geometry type '{geom_type}' — skipping feature.") |
| 118 | + skipped += 1 |
| 119 | + continue |
| 120 | + |
| 121 | + if geom is None or geom.is_empty: |
| 122 | + skipped += 1 |
| 123 | + continue |
| 124 | + |
| 125 | + row = {"geometry": geom} |
| 126 | + row.update(attrs) |
| 127 | + rows.append(row) |
| 128 | + |
| 129 | + if skipped: |
| 130 | + print(f" [INFO] Skipped {skipped} invalid/empty features.") |
| 131 | + |
| 132 | + if not rows: |
| 133 | + print(f" [WARN] No valid geometries extracted from: {layer_title}") |
| 134 | + return None |
| 135 | + |
| 136 | + gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326") |
| 137 | + return gdf |
| 138 | + |
| 139 | + |
| 140 | +# ───────────────────────────────────────────── |
| 141 | +# MAIN |
| 142 | +# ───────────────────────────────────────────── |
| 143 | + |
| 144 | +def safe_filename(title): |
| 145 | + """Strip characters that are unsafe in filenames.""" |
| 146 | + keep = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_- " |
| 147 | + name = "".join(c if c in keep else "_" for c in title) |
| 148 | + return name.strip().replace(" ", "_")[:60] |
| 149 | + |
| 150 | + |
| 151 | +def main(): |
| 152 | + parser = argparse.ArgumentParser(description="Extract PFISD attendance boundary shapefiles") |
| 153 | + parser.add_argument( |
| 154 | + "--local", "-l", |
| 155 | + type=str, |
| 156 | + default=None, |
| 157 | + help="Path to a local WebMap JSON file (skip HTTP fetch)", |
| 158 | + ) |
| 159 | + args = parser.parse_args() |
| 160 | + |
| 161 | + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
| 162 | + print(f"Output directory: {OUTPUT_DIR.resolve()}\n") |
| 163 | + |
| 164 | + # ------------------------------------------------------------------ |
| 165 | + # 1. Fetch the WebMap JSON (or load from local file) |
| 166 | + # ------------------------------------------------------------------ |
| 167 | + if args.local: |
| 168 | + local_path = Path(args.local) |
| 169 | + print(f"Loading WebMap data from local file: {local_path}") |
| 170 | + try: |
| 171 | + with open(local_path) as f: |
| 172 | + webmap = json.load(f) |
| 173 | + except Exception as e: |
| 174 | + print(f"[ERROR] Failed to load local file: {e}") |
| 175 | + sys.exit(1) |
| 176 | + else: |
| 177 | + print("Fetching WebMap data from ArcGIS Online...") |
| 178 | + try: |
| 179 | + resp = requests.get(WEBMAP_URL, timeout=30) |
| 180 | + resp.raise_for_status() |
| 181 | + webmap = resp.json() |
| 182 | + except Exception as e: |
| 183 | + print(f"[ERROR] Failed to fetch WebMap: {e}") |
| 184 | + sys.exit(1) |
| 185 | + |
| 186 | + operational_layers = webmap.get("operationalLayers", []) |
| 187 | + if not operational_layers: |
| 188 | + print("[ERROR] No operationalLayers found in WebMap JSON.") |
| 189 | + sys.exit(1) |
| 190 | + |
| 191 | + print(f"Found {len(operational_layers)} operational layer(s).\n") |
| 192 | + |
| 193 | + # ------------------------------------------------------------------ |
| 194 | + # 2. Iterate layers and export each as a shapefile |
| 195 | + # ------------------------------------------------------------------ |
| 196 | + exported = 0 |
| 197 | + |
| 198 | + for layer in operational_layers: |
| 199 | + layer_title = layer.get("title", "unnamed_layer") |
| 200 | + feature_collection = layer.get("featureCollection", {}) |
| 201 | + sub_layers = feature_collection.get("layers", []) |
| 202 | + |
| 203 | + if not sub_layers: |
| 204 | + print(f"[SKIP] '{layer_title}' — no featureCollection layers found.") |
| 205 | + continue |
| 206 | + |
| 207 | + print(f"Processing: {layer_title}") |
| 208 | + |
| 209 | + for i, sub_layer in enumerate(sub_layers): |
| 210 | + suffix = f"_{i}" if len(sub_layers) > 1 else "" |
| 211 | + fname = safe_filename(layer_title) + suffix |
| 212 | + |
| 213 | + gdf = extract_layer(sub_layer, layer_title) |
| 214 | + if gdf is None: |
| 215 | + continue |
| 216 | + |
| 217 | + out_path = OUTPUT_DIR / f"{fname}.shp" |
| 218 | + |
| 219 | + # Shapefile field names are limited to 10 characters |
| 220 | + gdf.columns = [c[:10] for c in gdf.columns] |
| 221 | + |
| 222 | + try: |
| 223 | + gdf.to_file(out_path, driver="ESRI Shapefile") |
| 224 | + print(f" -> Saved {len(gdf)} features -> {out_path}") |
| 225 | + exported += 1 |
| 226 | + except Exception as e: |
| 227 | + print(f" [ERROR] Could not write {out_path}: {e}") |
| 228 | + |
| 229 | + print() |
| 230 | + |
| 231 | + # ------------------------------------------------------------------ |
| 232 | + # 3. Summary |
| 233 | + # ------------------------------------------------------------------ |
| 234 | + print("-" * 50) |
| 235 | + print(f"Done. {exported} shapefile(s) written to: {OUTPUT_DIR.resolve()}") |
| 236 | + print("\nProjection: WGS84 (EPSG:4326)") |
| 237 | + print("Open in QGIS, ArcGIS Pro, or any GIS tool that reads .shp files.") |
| 238 | + |
| 239 | + |
| 240 | +if __name__ == "__main__": |
| 241 | + main() |
0 commit comments