Skip to content

Commit 5295b41

Browse files
committed
WIP
1 parent 528fbef commit 5295b41

1 file changed

Lines changed: 44 additions & 39 deletions

File tree

src/electiondata/userinterface/__init__.py

Lines changed: 44 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,22 @@
88
from electiondata import (
99
database as db,
1010
munge as m,
11-
juris as jm,
1211
nist as nist,
1312
constants,
1413
)
14+
15+
from slugify import slugify
1516
import pandas as pd
1617
from pandas.errors import ParserError
17-
import os
18+
from os import walk, listdir
19+
from os.path import join, isdir, isfile
1820
from pathlib import Path
1921
from typing import Optional, Dict, Any, List
20-
import datetime
21-
import csv
22-
import numpy as np
23-
import inspect
24-
import xml.etree.ElementTree as et
25-
import json
22+
from csv import QUOTE_MINIMAL
23+
from numpy import where
24+
from inspect import currentframe
25+
from xml.etree.ElementTree import parse
26+
from json import loads
2627
import shutil
2728
import xlrd
2829

@@ -191,7 +192,7 @@ def read_single_datafile(
191192
kwargs = tabular_kwargs(p, kwargs, aux=aux)
192193
if p["multi_block"] == "yes":
193194
kwargs["header"] = None
194-
kwargs["quoting"] = csv.QUOTE_MINIMAL
195+
kwargs["quoting"] = QUOTE_MINIMAL
195196
if p["flat_text_delimiter"] in ["tab", "\\t"]:
196197
kwargs["sep"] = "\t"
197198
else:
@@ -207,7 +208,7 @@ def read_single_datafile(
207208
else:
208209
driver = nist.xml_count_parse_info(p, ignore_namespace=True)
209210
xml_path_info = nist.xml_string_path_info(p["munge_fields"], p["namespace"])
210-
tree = et.parse(f_path)
211+
tree = parse(f_path)
211212
df, err = nist.df_from_tree(
212213
tree,
213214
xml_path_info=xml_path_info,
@@ -221,7 +222,7 @@ def read_single_datafile(
221222
elif p["file_type"] in ["json-nested"]:
222223
# TODO what if json-nested is a lookup?
223224
with open(f_path, "r") as f:
224-
data = json.loads(f.read())
225+
data = loads(f.read())
225226
df = pd.json_normalize(data, **kwargs)
226227
if not fatal_error(err):
227228
df.rename(columns=rename, inplace=True)
@@ -273,7 +274,7 @@ def read_single_datafile(
273274
err = add_new_error(
274275
err,
275276
"system",
276-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
277+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
277278
f"Unexpected error setting and filling headers after padding file {file_name}",
278279
)
279280

@@ -366,7 +367,7 @@ def excel_to_dict(
366367
err = add_new_error(
367368
err,
368369
"system",
369-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
370+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
370371
f"Unexpected exception while getting row-constant keyword arguments for \n"
371372
f"rows_to_read: {rows_to_read}\n"
372373
f"kwargs: {kwargs}.\n"
@@ -455,15 +456,15 @@ def copy_directory_with_backup(
455456
<backup_suffix>"""
456457
err = None
457458
# if the original to be copied is actually a directory
458-
if os.path.isdir(original_path):
459+
if isdir(original_path):
459460
if backup_suffix:
460461
# make backup of anything with existing name
461-
if os.path.isdir(copy_path):
462+
if isdir(copy_path):
462463
shutil.move(copy_path, f"{copy_path}{backup_suffix}")
463464
print(f"Moved {copy_path} to {copy_path}{backup_suffix}")
464-
elif os.path.isfile(copy_path):
465+
elif isfile(copy_path):
465466
old_stem = Path(copy_path).stem
466-
backup_path = os.path.join(
467+
backup_path = join(
467468
Path(copy_path).parent,
468469
f"{old_stem}{backup_suffix}.{Path(copy_path).suffix}",
469470
)
@@ -483,7 +484,7 @@ def copy_directory_with_backup(
483484
err = add_new_error(
484485
err,
485486
"warn-system",
486-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
487+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
487488
f"No such directory: {original_path}",
488489
)
489490
return err
@@ -494,11 +495,11 @@ def copy_with_err_handling(
494495
) -> Optional[dict]:
495496
err = None
496497
Path(copy_path).mkdir(parents=True, exist_ok=True)
497-
for root, dirs, files in os.walk(original_path, topdown=True):
498+
for root, dirs, files in walk(original_path, topdown=True):
498499
new_root = root.replace(original_path, copy_path)
499500
for f in files:
500-
old = os.path.join(root, f)
501-
new = os.path.join(new_root, f)
501+
old = join(root, f)
502+
new = join(new_root, f)
502503
try:
503504
shutil.copy(old, new)
504505
print(f"Copied {old} to {new}")
@@ -507,11 +508,11 @@ def copy_with_err_handling(
507508
err = add_new_error(
508509
err,
509510
"warn-file",
510-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
511+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
511512
f"Error while copying {old} to {new}:\n{she}",
512513
)
513514
for d in dirs:
514-
Path(os.path.join(new_root, d)).mkdir(parents=True, exist_ok=True)
515+
Path(join(new_root, d)).mkdir(parents=True, exist_ok=True)
515516
return err
516517

517518

@@ -644,12 +645,12 @@ def report(
644645

645646
if err_warn and [k for k in err_warn.keys() if err_warn[k]]:
646647
# create reporting directory if it does not exist
647-
if os.path.isfile(output_location):
648+
if isfile(output_location):
648649
print(
649650
"Target directory for errors and warnings exists as a file. Nothing will be reported."
650651
)
651652
return None
652-
elif not os.path.isdir(output_location):
653+
elif not isdir(output_location):
653654
Path(output_location).mkdir(parents=True, exist_ok=True)
654655

655656
if not key_list:
@@ -700,8 +701,10 @@ def report(
700701
out_str = f"\n{et.title()} errors ({nk_name}):\n{msg[(et, nk)]}\n\n{warn_str}"
701702

702703
# write info to a .errors or .errors file named for the name_key <nk>
703-
out_path = os.path.join(
704-
output_location, f"{file_prefix}_{et}_{nk_name}.errors"
704+
out_path = join(
705+
output_location, slugify(
706+
f"{file_prefix}_{et}_{nk_name}.errors",regex_pattern=r'[^ A-z0-9-_]+', lowercase=False
707+
)
705708
)
706709
with open(out_path, "a") as f:
707710
f.write(out_str)
@@ -722,8 +725,10 @@ def report(
722725
# write output
723726
# write info to a .warnings file named for the error-type and name_key
724727

725-
out_path = os.path.join(
726-
output_location, f"{file_prefix}_{et}_{nk_name}.warnings"
728+
out_path = join(
729+
output_location, slugify(
730+
f"{file_prefix}_{et}_{nk_name}.warnings",regex_pattern=r'[^ A-z0-9-_]+', lowercase=False
731+
)
727732
)
728733
with open(out_path, "a") as f:
729734
f.write(out_str)
@@ -770,7 +775,7 @@ def add_new_error(
770775
err = add_new_error(
771776
err,
772777
"system",
773-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
778+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
774779
f"Unrecognized key ({err_type}) for message {msg}",
775780
)
776781
return err
@@ -824,8 +829,8 @@ def confirm_essential_info(
824829
the given directory; False otherwise"""
825830

826831
# loop through files
827-
for f in [f for f in os.listdir(directory) if f[-4:] == ".ini"]:
828-
p_path = os.path.join(directory, f)
832+
for f in [f for f in listdir(directory) if f[-4:] == ".ini"]:
833+
p_path = join(directory, f)
829834
file_confirmed = False
830835
while not file_confirmed:
831836
param_dict, err = get_parameters(
@@ -873,10 +878,10 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
873878
for ini files whose results files are in the results_path directory
874879
"""
875880
ej_set = set()
876-
for subdir, dirs, files in os.walk(ini_path):
881+
for subdir, dirs, files in walk(ini_path):
877882
for f in files:
878883
if (f.endswith(".ini")) and (not f.endswith("template.ini")):
879-
full_path = os.path.join(subdir, f)
884+
full_path = join(subdir, f)
880885
d, err = get_parameters(
881886
param_file=full_path,
882887
header="election_results",
@@ -887,7 +892,7 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
887892
# if we're not checking against results directory, or if we are and the ini file
888893
# points to a file in or below the results directory
889894
if (not results_path) or (
890-
os.path.isfile(os.path.join(results_path, d["results_file"]))
895+
isfile(join(results_path, d["results_file"]))
891896
):
892897
# include the pair in the output
893898
ej_set.update({(d["election"], d["jurisdiction"])})
@@ -1142,7 +1147,7 @@ def clean_candidate_names(df):
11421147
extra_df = df[extra_cols]
11431148
df = df[df_cols]
11441149
df["party"] = df["type"].str.split(" ")
1145-
df["party"] = np.where(
1150+
df["party"] = where(
11461151
df["party"].str.contains("party", case=False),
11471152
df["party"]
11481153
.map(lambda x: x[0:-1])
@@ -1175,20 +1180,20 @@ def clean_candidate_names(df):
11751180
df["chamber"] = df["chamber"].fillna("unknown")
11761181
df["district"] = df["contest"].str.extract(r"(\d+)")
11771182
df["contest_short"] = ""
1178-
df["contest_short"] = np.where(
1183+
df["contest_short"] = where(
11791184
df["chamber"] != "unknown",
11801185
df[df.columns[5:]].apply(lambda x: "".join(x.dropna().astype(str)), axis=1),
11811186
df["contest_short"],
11821187
)
1183-
df["contest_short"] = np.where(
1188+
df["contest_short"] = where(
11841189
df["chamber"] == "unknown",
11851190
df["contest"]
11861191
.str.split(" ")
11871192
.map(lambda words: "".join([word[0:3] for word in words if word != "of"])),
11881193
df["contest_short"],
11891194
)
11901195
# Handle GA 2020 runoff senate elections
1191-
df["contest_short"] = np.where(
1196+
df["contest_short"] = where(
11921197
df["parent"].str.contains("runoff"),
11931198
df["contest_short"] + "Runoff",
11941199
df["contest_short"],

0 commit comments

Comments
 (0)