Skip to content

Commit 3f0d91d

Browse files
Merge pull request #744 from ElectionDataAnalysis/issue740-slugify
Issue740 slugify
2 parents 528fbef + 11b6d95 commit 3f0d91d

2 files changed

Lines changed: 50 additions & 42 deletions

File tree

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ requests==2.25.1
1313
openpyxl==3.0.6
1414
dicttoxml==1.7.4
1515
lxml==4.6.3
16+
python-slugify~=5.0.2
17+
electiondata~=2.0
18+
configparser~=5.0.2
19+
setuptools~=59.0.1

src/electiondata/userinterface/__init__.py

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,28 @@
44
DuplicateOptionError,
55
ParsingError,
66
)
7+
from csv import QUOTE_MINIMAL
8+
from inspect import currentframe
9+
import json
10+
from numpy import where
11+
from os import walk, listdir
12+
import os.path
13+
import pandas as pd
14+
from pathlib import Path
15+
import shutil
16+
from slugify import slugify
17+
from sqlalchemy.orm import Session
18+
from typing import Optional, Dict, Any, List
19+
import xlrd
20+
import xml.etree.ElementTree
721

22+
# local imports
823
from electiondata import (
924
database as db,
1025
munge as m,
11-
juris as jm,
1226
nist as nist,
1327
constants,
1428
)
15-
import pandas as pd
16-
from pandas.errors import ParserError
17-
import os
18-
from pathlib import Path
19-
from typing import Optional, Dict, Any, List
20-
import datetime
21-
import csv
22-
import numpy as np
23-
import inspect
24-
import xml.etree.ElementTree as et
25-
import json
26-
import shutil
27-
import xlrd
28-
29-
# may need for certain excel imports: import openpyxl
30-
from sqlalchemy.orm import Session
31-
32-
33-
# mapping from internal database reportingunit types to the user-facing contest types
34-
# (contests are categorized by the reporting unit type of their corresponding districts)
3529

3630

3731
def find_dupes(df):
@@ -59,9 +53,9 @@ def json_kwargs(
5953
else:
6054
json_rename[path_list[-1]] = mf
6155
meta = list(list(t) for t in meta_set)
62-
json_kwargs = {"meta": meta, "record_path": record_path, "errors": "ignore"}
56+
j_kwargs = {"meta": meta, "record_path": record_path, "errors": "ignore"}
6357

64-
return json_kwargs, json_rename
58+
return j_kwargs, json_rename
6559

6660

6761
def tabular_kwargs(
@@ -137,7 +131,7 @@ def list_desired_excel_sheets(f_path: str, p: dict) -> (Optional[list], Optional
137131
all_sheets = xl.sheet_names
138132
# xlsx = openpyxl.load_workbook(f_path)
139133
# all_sheets = xlsx.get_sheet_names()
140-
except Exception as exc:
134+
except Exception:
141135
try:
142136
# read xls file
143137
xls = xlrd.open_workbook(f_path, on_demand=True)
@@ -191,7 +185,7 @@ def read_single_datafile(
191185
kwargs = tabular_kwargs(p, kwargs, aux=aux)
192186
if p["multi_block"] == "yes":
193187
kwargs["header"] = None
194-
kwargs["quoting"] = csv.QUOTE_MINIMAL
188+
kwargs["quoting"] = QUOTE_MINIMAL
195189
if p["flat_text_delimiter"] in ["tab", "\\t"]:
196190
kwargs["sep"] = "\t"
197191
else:
@@ -207,7 +201,7 @@ def read_single_datafile(
207201
else:
208202
driver = nist.xml_count_parse_info(p, ignore_namespace=True)
209203
xml_path_info = nist.xml_string_path_info(p["munge_fields"], p["namespace"])
210-
tree = et.parse(f_path)
204+
tree = xml.etree.ElementTree.parse(f_path)
211205
df, err = nist.df_from_tree(
212206
tree,
213207
xml_path_info=xml_path_info,
@@ -273,7 +267,7 @@ def read_single_datafile(
273267
err = add_new_error(
274268
err,
275269
"system",
276-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
270+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
277271
f"Unexpected error setting and filling headers after padding file {file_name}",
278272
)
279273

@@ -310,7 +304,7 @@ def read_single_datafile(
310304
except UnicodeDecodeError as ude:
311305
err_str = f"Encoding error. Datafile not read completely.\n\t{ude}"
312306
err = add_new_error(err, "file", file_name, err_str)
313-
except ParserError as pe:
307+
except pd.errors.ParserError as pe:
314308
# DFs have trouble comparing against None. So we return an empty DF and
315309
# check for emptiness below as an indication of an error.
316310
err_str = f"Error parsing results file.\n{pe}"
@@ -366,7 +360,7 @@ def excel_to_dict(
366360
err = add_new_error(
367361
err,
368362
"system",
369-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
363+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
370364
f"Unexpected exception while getting row-constant keyword arguments for \n"
371365
f"rows_to_read: {rows_to_read}\n"
372366
f"kwargs: {kwargs}.\n"
@@ -483,7 +477,7 @@ def copy_directory_with_backup(
483477
err = add_new_error(
484478
err,
485479
"warn-system",
486-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
480+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
487481
f"No such directory: {original_path}",
488482
)
489483
return err
@@ -494,7 +488,7 @@ def copy_with_err_handling(
494488
) -> Optional[dict]:
495489
err = None
496490
Path(copy_path).mkdir(parents=True, exist_ok=True)
497-
for root, dirs, files in os.walk(original_path, topdown=True):
491+
for root, dirs, files in walk(original_path, topdown=True):
498492
new_root = root.replace(original_path, copy_path)
499493
for f in files:
500494
old = os.path.join(root, f)
@@ -507,7 +501,7 @@ def copy_with_err_handling(
507501
err = add_new_error(
508502
err,
509503
"warn-file",
510-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
504+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
511505
f"Error while copying {old} to {new}:\n{she}",
512506
)
513507
for d in dirs:
@@ -701,7 +695,12 @@ def report(
701695

702696
# write info to a .errors or .errors file named for the name_key <nk>
703697
out_path = os.path.join(
704-
output_location, f"{file_prefix}_{et}_{nk_name}.errors"
698+
output_location,
699+
slugify(
700+
f"{file_prefix}_{et}_{nk_name}.errors",
701+
regex_pattern=r"[^ A-z0-9-_]+",
702+
lowercase=False,
703+
),
705704
)
706705
with open(out_path, "a") as f:
707706
f.write(out_str)
@@ -723,7 +722,12 @@ def report(
723722
# write info to a .warnings file named for the error-type and name_key
724723

725724
out_path = os.path.join(
726-
output_location, f"{file_prefix}_{et}_{nk_name}.warnings"
725+
output_location,
726+
slugify(
727+
f"{file_prefix}_{et}_{nk_name}.warnings",
728+
regex_pattern=r"[^ A-z0-9-_]+",
729+
lowercase=False,
730+
),
727731
)
728732
with open(out_path, "a") as f:
729733
f.write(out_str)
@@ -770,7 +774,7 @@ def add_new_error(
770774
err = add_new_error(
771775
err,
772776
"system",
773-
f"{Path(__file__).absolute().parents[0].name}.{inspect.currentframe().f_code.co_name}",
777+
f"{Path(__file__).absolute().parents[0].name}.{currentframe().f_code.co_name}",
774778
f"Unrecognized key ({err_type}) for message {msg}",
775779
)
776780
return err
@@ -824,7 +828,7 @@ def confirm_essential_info(
824828
the given directory; False otherwise"""
825829

826830
# loop through files
827-
for f in [f for f in os.listdir(directory) if f[-4:] == ".ini"]:
831+
for f in [f for f in listdir(directory) if f[-4:] == ".ini"]:
828832
p_path = os.path.join(directory, f)
829833
file_confirmed = False
830834
while not file_confirmed:
@@ -873,7 +877,7 @@ def election_juris_list(ini_path: str, results_path: Optional[str] = None) -> li
873877
for ini files whose results files are in the results_path directory
874878
"""
875879
ej_set = set()
876-
for subdir, dirs, files in os.walk(ini_path):
880+
for subdir, dirs, files in walk(ini_path):
877881
for f in files:
878882
if (f.endswith(".ini")) and (not f.endswith("template.ini")):
879883
full_path = os.path.join(subdir, f)
@@ -1142,7 +1146,7 @@ def clean_candidate_names(df):
11421146
extra_df = df[extra_cols]
11431147
df = df[df_cols]
11441148
df["party"] = df["type"].str.split(" ")
1145-
df["party"] = np.where(
1149+
df["party"] = where(
11461150
df["party"].str.contains("party", case=False),
11471151
df["party"]
11481152
.map(lambda x: x[0:-1])
@@ -1175,20 +1179,20 @@ def clean_candidate_names(df):
11751179
df["chamber"] = df["chamber"].fillna("unknown")
11761180
df["district"] = df["contest"].str.extract(r"(\d+)")
11771181
df["contest_short"] = ""
1178-
df["contest_short"] = np.where(
1182+
df["contest_short"] = where(
11791183
df["chamber"] != "unknown",
11801184
df[df.columns[5:]].apply(lambda x: "".join(x.dropna().astype(str)), axis=1),
11811185
df["contest_short"],
11821186
)
1183-
df["contest_short"] = np.where(
1187+
df["contest_short"] = where(
11841188
df["chamber"] == "unknown",
11851189
df["contest"]
11861190
.str.split(" ")
11871191
.map(lambda words: "".join([word[0:3] for word in words if word != "of"])),
11881192
df["contest_short"],
11891193
)
11901194
# Handle GA 2020 runoff senate elections
1191-
df["contest_short"] = np.where(
1195+
df["contest_short"] = where(
11921196
df["parent"].str.contains("runoff"),
11931197
df["contest_short"] + "Runoff",
11941198
df["contest_short"],
@@ -1213,7 +1217,7 @@ def disambiguate_empty_cols(
12131217
df = df_in.reset_index(drop=True)
12141218

12151219
# put dummy info into the tops of the bad columns
1216-
# in order to meet MultiIndex uniqueness criteria
1220+
# in order to meet pd.MultiIndex uniqueness criteria
12171221
mask = df.eq("").loc[start:].all()
12181222
bad_column_numbers = [j for j in range(original_number_of_columns) if mask[j]]
12191223
for j in bad_column_numbers:

0 commit comments

Comments
 (0)