88from electiondata import (
99 database as db ,
1010 munge as m ,
11- juris as jm ,
1211 nist as nist ,
1312 constants ,
1413)
14+
15+ from slugify import slugify
1516import pandas as pd
1617from pandas .errors import ParserError
17- import os
18+ from os import walk , listdir
19+ from os .path import join , isdir , isfile
1820from pathlib import Path
1921from typing import Optional , Dict , Any , List
20- import datetime
21- import csv
22- import numpy as np
23- import inspect
24- import xml .etree .ElementTree as et
25- import json
22+ from csv import QUOTE_MINIMAL
23+ from numpy import where
24+ from inspect import currentframe
25+ from xml .etree .ElementTree import parse
26+ from json import loads
2627import shutil
2728import xlrd
2829
@@ -191,7 +192,7 @@ def read_single_datafile(
191192 kwargs = tabular_kwargs (p , kwargs , aux = aux )
192193 if p ["multi_block" ] == "yes" :
193194 kwargs ["header" ] = None
194- kwargs ["quoting" ] = csv . QUOTE_MINIMAL
195+ kwargs ["quoting" ] = QUOTE_MINIMAL
195196 if p ["flat_text_delimiter" ] in ["tab" , "\\ t" ]:
196197 kwargs ["sep" ] = "\t "
197198 else :
@@ -207,7 +208,7 @@ def read_single_datafile(
207208 else :
208209 driver = nist .xml_count_parse_info (p , ignore_namespace = True )
209210 xml_path_info = nist .xml_string_path_info (p ["munge_fields" ], p ["namespace" ])
210- tree = et . parse (f_path )
211+ tree = parse (f_path )
211212 df , err = nist .df_from_tree (
212213 tree ,
213214 xml_path_info = xml_path_info ,
@@ -221,7 +222,7 @@ def read_single_datafile(
221222 elif p ["file_type" ] in ["json-nested" ]:
222223 # TODO what if json-nested is a lookup?
223224 with open (f_path , "r" ) as f :
224- data = json . loads (f .read ())
225+ data = loads (f .read ())
225226 df = pd .json_normalize (data , ** kwargs )
226227 if not fatal_error (err ):
227228 df .rename (columns = rename , inplace = True )
@@ -273,7 +274,7 @@ def read_single_datafile(
273274 err = add_new_error (
274275 err ,
275276 "system" ,
276- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
277+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
277278 f"Unexpected error setting and filling headers after padding file { file_name } " ,
278279 )
279280
@@ -366,7 +367,7 @@ def excel_to_dict(
366367 err = add_new_error (
367368 err ,
368369 "system" ,
369- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
370+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
370371 f"Unexpected exception while getting row-constant keyword arguments for \n "
371372 f"rows_to_read: { rows_to_read } \n "
372373 f"kwargs: { kwargs } .\n "
@@ -455,15 +456,15 @@ def copy_directory_with_backup(
455456 <backup_suffix>"""
456457 err = None
457458 # if the original to be copied is actually a directory
458- if os . path . isdir (original_path ):
459+ if isdir (original_path ):
459460 if backup_suffix :
460461 # make backup of anything with existing name
461- if os . path . isdir (copy_path ):
462+ if isdir (copy_path ):
462463 shutil .move (copy_path , f"{ copy_path } { backup_suffix } " )
463464 print (f"Moved { copy_path } to { copy_path } { backup_suffix } " )
464- elif os . path . isfile (copy_path ):
465+ elif isfile (copy_path ):
465466 old_stem = Path (copy_path ).stem
466- backup_path = os . path . join (
467+ backup_path = join (
467468 Path (copy_path ).parent ,
468469 f"{ old_stem } { backup_suffix } .{ Path (copy_path ).suffix } " ,
469470 )
@@ -483,7 +484,7 @@ def copy_directory_with_backup(
483484 err = add_new_error (
484485 err ,
485486 "warn-system" ,
486- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
487+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
487488 f"No such directory: { original_path } " ,
488489 )
489490 return err
@@ -494,11 +495,11 @@ def copy_with_err_handling(
494495) -> Optional [dict ]:
495496 err = None
496497 Path (copy_path ).mkdir (parents = True , exist_ok = True )
497- for root , dirs , files in os . walk (original_path , topdown = True ):
498+ for root , dirs , files in walk (original_path , topdown = True ):
498499 new_root = root .replace (original_path , copy_path )
499500 for f in files :
500- old = os . path . join (root , f )
501- new = os . path . join (new_root , f )
501+ old = join (root , f )
502+ new = join (new_root , f )
502503 try :
503504 shutil .copy (old , new )
504505 print (f"Copied { old } to { new } " )
@@ -507,11 +508,11 @@ def copy_with_err_handling(
507508 err = add_new_error (
508509 err ,
509510 "warn-file" ,
510- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
511+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
511512 f"Error while copying { old } to { new } :\n { she } " ,
512513 )
513514 for d in dirs :
514- Path (os . path . join (new_root , d )).mkdir (parents = True , exist_ok = True )
515+ Path (join (new_root , d )).mkdir (parents = True , exist_ok = True )
515516 return err
516517
517518
@@ -644,12 +645,12 @@ def report(
644645
645646 if err_warn and [k for k in err_warn .keys () if err_warn [k ]]:
646647 # create reporting directory if it does not exist
647- if os . path . isfile (output_location ):
648+ if isfile (output_location ):
648649 print (
649650 "Target directory for errors and warnings exists as a file. Nothing will be reported."
650651 )
651652 return None
652- elif not os . path . isdir (output_location ):
653+ elif not isdir (output_location ):
653654 Path (output_location ).mkdir (parents = True , exist_ok = True )
654655
655656 if not key_list :
@@ -700,8 +701,10 @@ def report(
700701 out_str = f"\n { et .title ()} errors ({ nk_name } ):\n { msg [(et , nk )]} \n \n { warn_str } "
701702
702703 # write info to a .errors or .errors file named for the name_key <nk>
703- out_path = os .path .join (
704- output_location , f"{ file_prefix } _{ et } _{ nk_name } .errors"
704+ out_path = join (
705+ output_location , slugify (
706+ f"{ file_prefix } _{ et } _{ nk_name } .errors" ,regex_pattern = r'[^ A-z0-9-_]+' , lowercase = False
707+ )
705708 )
706709 with open (out_path , "a" ) as f :
707710 f .write (out_str )
@@ -722,8 +725,10 @@ def report(
722725 # write output
723726 # write info to a .warnings file named for the error-type and name_key
724727
725- out_path = os .path .join (
726- output_location , f"{ file_prefix } _{ et } _{ nk_name } .warnings"
728+ out_path = join (
729+ output_location , slugify (
730+ f"{ file_prefix } _{ et } _{ nk_name } .warnings" ,regex_pattern = r'[^ A-z0-9-_]+' , lowercase = False
731+ )
727732 )
728733 with open (out_path , "a" ) as f :
729734 f .write (out_str )
@@ -770,7 +775,7 @@ def add_new_error(
770775 err = add_new_error (
771776 err ,
772777 "system" ,
773- f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ inspect . currentframe ().f_code .co_name } " ,
778+ f"{ Path (__file__ ).absolute ().parents [0 ].name } .{ currentframe ().f_code .co_name } " ,
774779 f"Unrecognized key ({ err_type } ) for message { msg } " ,
775780 )
776781 return err
@@ -824,8 +829,8 @@ def confirm_essential_info(
824829 the given directory; False otherwise"""
825830
826831 # loop through files
827- for f in [f for f in os . listdir (directory ) if f [- 4 :] == ".ini" ]:
828- p_path = os . path . join (directory , f )
832+ for f in [f for f in listdir (directory ) if f [- 4 :] == ".ini" ]:
833+ p_path = join (directory , f )
829834 file_confirmed = False
830835 while not file_confirmed :
831836 param_dict , err = get_parameters (
@@ -873,10 +878,10 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
873878 for ini files whose results files are in the results_path directory
874879 """
875880 ej_set = set ()
876- for subdir , dirs , files in os . walk (ini_path ):
881+ for subdir , dirs , files in walk (ini_path ):
877882 for f in files :
878883 if (f .endswith (".ini" )) and (not f .endswith ("template.ini" )):
879- full_path = os . path . join (subdir , f )
884+ full_path = join (subdir , f )
880885 d , err = get_parameters (
881886 param_file = full_path ,
882887 header = "election_results" ,
@@ -887,7 +892,7 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
887892 # if we're not checking against results directory, or if we are and the ini file
888893 # points to a file in or below the results directory
889894 if (not results_path ) or (
890- os . path . isfile (os . path . join (results_path , d ["results_file" ]))
895+ isfile (join (results_path , d ["results_file" ]))
891896 ):
892897 # include the pair in the output
893898 ej_set .update ({(d ["election" ], d ["jurisdiction" ])})
@@ -1142,7 +1147,7 @@ def clean_candidate_names(df):
11421147 extra_df = df [extra_cols ]
11431148 df = df [df_cols ]
11441149 df ["party" ] = df ["type" ].str .split (" " )
1145- df ["party" ] = np . where (
1150+ df ["party" ] = where (
11461151 df ["party" ].str .contains ("party" , case = False ),
11471152 df ["party" ]
11481153 .map (lambda x : x [0 :- 1 ])
@@ -1175,20 +1180,20 @@ def clean_candidate_names(df):
11751180 df ["chamber" ] = df ["chamber" ].fillna ("unknown" )
11761181 df ["district" ] = df ["contest" ].str .extract (r"(\d+)" )
11771182 df ["contest_short" ] = ""
1178- df ["contest_short" ] = np . where (
1183+ df ["contest_short" ] = where (
11791184 df ["chamber" ] != "unknown" ,
11801185 df [df .columns [5 :]].apply (lambda x : "" .join (x .dropna ().astype (str )), axis = 1 ),
11811186 df ["contest_short" ],
11821187 )
1183- df ["contest_short" ] = np . where (
1188+ df ["contest_short" ] = where (
11841189 df ["chamber" ] == "unknown" ,
11851190 df ["contest" ]
11861191 .str .split (" " )
11871192 .map (lambda words : "" .join ([word [0 :3 ] for word in words if word != "of" ])),
11881193 df ["contest_short" ],
11891194 )
11901195 # Handle GA 2020 runoff senate elections
1191- df ["contest_short" ] = np . where (
1196+ df ["contest_short" ] = where (
11921197 df ["parent" ].str .contains ("runoff" ),
11931198 df ["contest_short" ] + "Runoff" ,
11941199 df ["contest_short" ],
0 commit comments