44 DuplicateOptionError ,
55 ParsingError ,
66)
7+ from csv import QUOTE_MINIMAL
8+ from inspect import currentframe
9+ import json
10+ from numpy import where
11+ from os import walk , listdir
12+ import os .path
13+ import pandas as pd
14+ from pathlib import Path
15+ import shutil
16+ from slugify import slugify
17+ from sqlalchemy .orm import Session
18+ from typing import Optional , Dict , Any , List
19+ import xlrd
20+ import xml .etree .ElementTree
721
22+ # local imports
823from electiondata import (
924 database as db ,
1025 munge as m ,
11- juris as jm ,
1226 nist as nist ,
1327 constants ,
1428)
15- import pandas as pd
16- from pandas .errors import ParserError
17- import os
18- from pathlib import Path
19- from typing import Optional , Dict , Any , List
20- import datetime
21- import csv
22- import numpy as np
23- import inspect
24- import xml .etree .ElementTree as et
25- import json
26- import shutil
27- import xlrd
28-
29- # may need for certain excel imports: import openpyxl
30- from sqlalchemy .orm import Session
31-
32-
33- # mapping from internal database reportingunit types to the user-facing contest types
34- # (contests are categorized by the reporting unit type of their corresponding districts)
3529
3630
3731def find_dupes (df ):
@@ -59,9 +53,9 @@ def json_kwargs(
5953 else :
6054 json_rename [path_list [- 1 ]] = mf
6155 meta = list (list (t ) for t in meta_set )
62- json_kwargs = {"meta" : meta , "record_path" : record_path , "errors" : "ignore" }
56+ j_kwargs = {"meta" : meta , "record_path" : record_path , "errors" : "ignore" }
6357
64- return json_kwargs , json_rename
58+ return j_kwargs , json_rename
6559
6660
6761def tabular_kwargs (
@@ -137,7 +131,7 @@ def list_desired_excel_sheets(f_path: str, p: dict) -> (Optional[list], Optional
137131 all_sheets = xl .sheet_names
138132 # xlsx = openpyxl.load_workbook(f_path)
139133 # all_sheets = xlsx.get_sheet_names()
140- except Exception as exc :
134+ except Exception :
141135 try :
142136 # read xls file
143137 xls = xlrd .open_workbook (f_path , on_demand = True )
@@ -191,7 +185,7 @@ def read_single_datafile(
191185 kwargs = tabular_kwargs (p , kwargs , aux = aux )
192186 if p ["multi_block" ] == "yes" :
193187 kwargs ["header" ] = None
194- kwargs ["quoting" ] = csv . QUOTE_MINIMAL
188+ kwargs ["quoting" ] = QUOTE_MINIMAL
195189 if p ["flat_text_delimiter" ] in ["tab" , "\\ t" ]:
196190 kwargs ["sep" ] = "\t "
197191 else :
@@ -207,7 +201,7 @@ def read_single_datafile(
207201 else :
208202 driver = nist .xml_count_parse_info (p , ignore_namespace = True )
209203 xml_path_info = nist .xml_string_path_info (p ["munge_fields" ], p ["namespace" ])
210- tree = et .parse (f_path )
204+ tree = xml . etree . ElementTree .parse (f_path )
211205 df , err = nist .df_from_tree (
212206 tree ,
213207 xml_path_info = xml_path_info ,
@@ -273,7 +267,7 @@ def read_single_datafile(
273267 err = add_new_error (
274268 err ,
275269 "system" ,
276- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
270+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
277271 f"Unexpected error setting and filling headers after padding file { file_name } " ,
278272 )
279273
@@ -310,7 +304,7 @@ def read_single_datafile(
310304 except UnicodeDecodeError as ude :
311305 err_str = f"Encoding error. Datafile not read completely.\n \t { ude } "
312306 err = add_new_error (err , "file" , file_name , err_str )
313- except ParserError as pe :
307+ except pd . errors . ParserError as pe :
314308 # DFs have trouble comparing against None. So we return an empty DF and
315309 # check for emptiness below as an indication of an error.
316310 err_str = f"Error parsing results file.\n { pe } "
@@ -366,7 +360,7 @@ def excel_to_dict(
366360 err = add_new_error (
367361 err ,
368362 "system" ,
369- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
363+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
370364 f"Unexpected exception while getting row-constant keyword arguments for \n "
371365 f"rows_to_read: { rows_to_read } \n "
372366 f"kwargs: { kwargs } .\n "
@@ -483,7 +477,7 @@ def copy_directory_with_backup(
483477 err = add_new_error (
484478 err ,
485479 "warn-system" ,
486- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
480+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
487481 f"No such directory: { original_path } " ,
488482 )
489483 return err
@@ -494,7 +488,7 @@ def copy_with_err_handling(
494488) -> Optional [dict ]:
495489 err = None
496490 Path (copy_path ).mkdir (parents = True , exist_ok = True )
497- for root , dirs , files in os . walk (original_path , topdown = True ):
491+ for root , dirs , files in walk (original_path , topdown = True ):
498492 new_root = root .replace (original_path , copy_path )
499493 for f in files :
500494 old = os .path .join (root , f )
@@ -507,7 +501,7 @@ def copy_with_err_handling(
507501 err = add_new_error (
508502 err ,
509503 "warn-file" ,
510- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
504+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
511505 f"Error while copying { old } to { new } :\n { she } " ,
512506 )
513507 for d in dirs :
@@ -701,7 +695,12 @@ def report(
701695
702696 # write info to a .errors or .errors file named for the name_key <nk>
703697 out_path = os .path .join (
704- output_location , f"{ file_prefix } _{ et } _{ nk_name } .errors"
698+ output_location ,
699+ slugify (
700+ f"{ file_prefix } _{ et } _{ nk_name } .errors" ,
701+ regex_pattern = r"[^ A-z0-9-_]+" ,
702+ lowercase = False ,
703+ ),
705704 )
706705 with open (out_path , "a" ) as f :
707706 f .write (out_str )
@@ -723,7 +722,12 @@ def report(
723722 # write info to a .warnings file named for the error-type and name_key
724723
725724 out_path = os .path .join (
726- output_location , f"{ file_prefix } _{ et } _{ nk_name } .warnings"
725+ output_location ,
726+ slugify (
727+ f"{ file_prefix } _{ et } _{ nk_name } .warnings" ,
728+ regex_pattern = r"[^ A-z0-9-_]+" ,
729+ lowercase = False ,
730+ ),
727731 )
728732 with open (out_path , "a" ) as f :
729733 f .write (out_str )
@@ -770,7 +774,7 @@ def add_new_error(
770774 err = add_new_error (
771775 err ,
772776 "system" ,
773- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
777+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
774778 f"Unrecognized key ({ err_type } ) for message { msg } " ,
775779 )
776780 return err
@@ -824,7 +828,7 @@ def confirm_essential_info(
824828 the given directory; False otherwise"""
825829
826830 # loop through files
827- for f in [f for f in os . listdir (directory ) if f [- 4 :] == ".ini" ]:
831+ for f in [f for f in listdir (directory ) if f [- 4 :] == ".ini" ]:
828832 p_path = os .path .join (directory , f )
829833 file_confirmed = False
830834 while not file_confirmed :
@@ -873,7 +877,7 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
873877 for ini files whose results files are in the results_path directory
874878 """
875879 ej_set = set ()
876- for subdir , dirs , files in os . walk (ini_path ):
880+ for subdir , dirs , files in walk (ini_path ):
877881 for f in files :
878882 if (f .endswith (".ini" )) and (not f .endswith ("template.ini" )):
879883 full_path = os .path .join (subdir , f )
@@ -1142,7 +1146,7 @@ def clean_candidate_names(df):
11421146 extra_df = df [extra_cols ]
11431147 df = df [df_cols ]
11441148 df ["party" ] = df ["type" ].str .split (" " )
1145- df ["party" ] = np . where (
1149+ df ["party" ] = where (
11461150 df ["party" ].str .contains ("party" , case = False ),
11471151 df ["party" ]
11481152 .map (lambda x : x [0 :- 1 ])
@@ -1175,20 +1179,20 @@ def clean_candidate_names(df):
11751179 df ["chamber" ] = df ["chamber" ].fillna ("unknown" )
11761180 df ["district" ] = df ["contest" ].str .extract (r"(\d+)" )
11771181 df ["contest_short" ] = ""
1178- df ["contest_short" ] = np . where (
1182+ df ["contest_short" ] = where (
11791183 df ["chamber" ] != "unknown" ,
11801184 df [df .columns [5 :]].apply (lambda x : "" .join (x .dropna ().astype (str )), axis = 1 ),
11811185 df ["contest_short" ],
11821186 )
1183- df ["contest_short" ] = np . where (
1187+ df ["contest_short" ] = where (
11841188 df ["chamber" ] == "unknown" ,
11851189 df ["contest" ]
11861190 .str .split (" " )
11871191 .map (lambda words : "" .join ([word [0 :3 ] for word in words if word != "of" ])),
11881192 df ["contest_short" ],
11891193 )
11901194 # Handle GA 2020 runoff senate elections
1191- df ["contest_short" ] = np . where (
1195+ df ["contest_short" ] = where (
11921196 df ["parent" ].str .contains ("runoff" ),
11931197 df ["contest_short" ] + "Runoff" ,
11941198 df ["contest_short" ],
@@ -1213,7 +1217,7 @@ def disambiguate_empty_cols(
12131217 df = df_in .reset_index (drop = True )
12141218
12151219 # put dummy info into the tops of the bad columns
1216- # in order to meet MultiIndex uniqueness criteria
1220+ # in order to meet pd. MultiIndex uniqueness criteria
12171221 mask = df .eq ("" ).loc [start :].all ()
12181222 bad_column_numbers = [j for j in range (original_number_of_columns ) if mask [j ]]
12191223 for j in bad_column_numbers :
0 commit comments