Skip to content

Commit e08aac8

Browse files
author
Eric Tsai
committed
rename munge_routines to munge
1 parent d6d4a72 commit e08aac8

8 files changed

Lines changed: 30 additions & 31 deletions

File tree

src/election_anomaly/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from election_anomaly import database as db
22
from election_anomaly import user_interface as ui
3-
from election_anomaly import munge_routines as mr
3+
from election_anomaly import munge as m
44
from sqlalchemy.orm import sessionmaker
55
import datetime
66
from pathlib import Path
@@ -469,17 +469,17 @@ def add_sub_county_rus_from_results_file(
469469
return error
470470

471471
# add columns for county and sub_ru
472-
wr, error = mr.add_column_from_formula(wr,county_formula, 'County_raw', error, suffix='_SOURCE')
473-
wr, error = mr.add_column_from_formula(wr,sub_ru_formula, 'Sub_County_raw', error, suffix='_SOURCE')
472+
wr, error = m.add_column_from_formula(wr,county_formula, 'County_raw', error, suffix='_SOURCE')
473+
wr, error = m.add_column_from_formula(wr,sub_ru_formula, 'Sub_County_raw', error, suffix='_SOURCE')
474474

475475
# add column for county internal name
476476
ru_dict_old = prep.get_element(self.d['jurisdiction_path'],'dictionary')
477477
ru_dict_new = ru_dict_old[ru_dict_old.cdf_element=='ReportingUnit']
478478
wr = wr.merge(ru_dict_new,how='left',left_on='County_raw',right_on='raw_identifier_value').rename(columns={'cdf_internal_name':'County_internal'})
479479

480480
# add required new columns
481-
wr = mr.add_constant_column(wr,'ReportingUnitType',sub_ru_type)
482-
wr = mr.add_constant_column(wr,'cdf_element','ReportingUnit')
481+
wr = m.add_constant_column(wr,'ReportingUnitType',sub_ru_type)
482+
wr = m.add_constant_column(wr,'cdf_element','ReportingUnit')
483483
wr['Name'] = wr.apply(lambda x: f'{x["County_internal"]};{x["Sub_County_raw"]}',axis=1)
484484
wr['raw_identifier_value'] = wr.apply(lambda x: f'{x["County_raw"]};{x["Sub_County_raw"]}',axis=1)
485485

@@ -542,7 +542,7 @@ def add_elements_from_results_file(self, elements: iter, error: dict, results_fi
542542
for element in elements:
543543
name_field = db.get_name_field(element)
544544
# append <element>_raw
545-
wr, error = mr.add_munged_column(
545+
wr, error = m.add_munged_column(
546546
wr, mu, element, error, mode=mu.cdf_elements.loc[element, 'source'],
547547
inplace=False)
548548
if error:
@@ -584,7 +584,7 @@ def starter_dictionary(self,include_existing=True) -> str:
584584
for element in elements:
585585
w[element] = prep.get_element(self.d['jurisdiction_path'],element)
586586
name_field = db.get_name_field(element)
587-
w[element] = mr.add_constant_column(w[element],'cdf_element',element)
587+
w[element] = m.add_constant_column(w[element],'cdf_element',element)
588588
w[element].rename(columns={name_field:'cdf_internal_name'},inplace=True)
589589
w[element]['raw_identifier_value'] = w[element]['cdf_internal_name']
590590

src/election_anomaly/analyze/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import pandas as pd
55
from election_anomaly import user_interface as ui
6-
from election_anomaly import munge_routines as mr
6+
from election_anomaly import munge as m
77
import datetime
88
import os
99
import numpy as np
@@ -536,7 +536,7 @@ def create_candidate_contests(df, columns):
536536
if contest_df.empty:
537537
contest_df['contest_type'] = None
538538
else:
539-
contest_df = mr.add_constant_column(contest_df,'contest_type','Candidate')
539+
contest_df = m.add_constant_column(contest_df,'contest_type','Candidate')
540540
return contest_df
541541

542542

@@ -549,7 +549,7 @@ def create_ballot_measure_contests(df, columns):
549549
if ballotmeasure_df.empty:
550550
ballotmeasure_df['contest_type'] = None
551551
else:
552-
ballotmeasure_df = mr.add_constant_column(ballotmeasure_df,'contest_type','BallotMeasure')
552+
ballotmeasure_df = m.add_constant_column(ballotmeasure_df,'contest_type','BallotMeasure')
553553
return ballotmeasure_df
554554

555555

@@ -563,7 +563,7 @@ def create_contests(df, reporting_units, candidate_columns=None, ballotmeasure_c
563563
contest_selection = pd.concat([c_df, bm_df])
564564
contest_selection = contest_selection.merge(reporting_units,
565565
how='left', left_on='ElectionDistrict_Id', right_index=True)
566-
contest_selection = mr.enum_col_from_id_othertext(contest_selection,'ReportingUnitType',df['ReportingUnitType'])
566+
contest_selection = m.enum_col_from_id_othertext(contest_selection,'ReportingUnitType',df['ReportingUnitType'])
567567
contest_selection.rename(columns={'ReportingUnitType':'contest_district_type'},inplace=True)
568568
return contest_selection
569569

@@ -612,6 +612,6 @@ def create_vote_counts(df, ecsvcj, contest_selection, ru_children, sub_ru):
612612
}
613613
unsummed.rename(columns=rename, inplace=True)
614614
# add columns with names
615-
unsummed = mr.enum_col_from_id_othertext(unsummed,'CountItemType',df['CountItemType'],drop_old=False)
615+
unsummed = m.enum_col_from_id_othertext(unsummed,'CountItemType',df['CountItemType'],drop_old=False)
616616
unsummed = unsummed.merge(contest_selection,how='left',on=['Selection_Id','Contest_Id'])
617617
return unsummed

src/election_anomaly/database/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from election_anomaly import user_interface as ui
1616
from configparser import MissingSectionHeaderError
1717
import pandas as pd
18-
from election_anomaly import munge_routines as mr
18+
from election_anomaly import munge as m
1919
import re
2020
from election_anomaly.database import create_cdf_db as db_cdf
2121
import os
@@ -314,7 +314,7 @@ def insert_to_cdf_db(engine, df, element, sep='\t', encoding='iso-8859-1', times
314314
it must be specified in <timestamp>; <df> must have columns matching <element>, except Id and <timestamp> if any"""
315315

316316
# initialize connection and cursor
317-
working = mr.generic_clean(df)
317+
working = m.generic_clean(df)
318318
connection = engine.raw_connection()
319319
cursor = connection.cursor()
320320

@@ -363,7 +363,7 @@ def insert_to_cdf_db(engine, df, element, sep='\t', encoding='iso-8859-1', times
363363

364364
# add any missing columns needed for temp table to working
365365
for c in temp_only_cols:
366-
working = mr.add_constant_column(working,c,None)
366+
working = m.add_constant_column(working,c,None)
367367
working[temp_columns].drop_duplicates().to_csv(
368368
output, sep=sep, header=False, encoding=encoding, index=False, quoting=csv.QUOTE_MINIMAL)
369369
# set current position for the StringIO object to the beginning of the string
@@ -436,7 +436,7 @@ def append_id_to_dframe(engine: sqlalchemy.engine, df: pd.DataFrame, table, col_
436436
df_cols = list(col_map.keys())
437437

438438
# create temp db table with info from df, without index
439-
df = mr.generic_clean(df)
439+
df = m.generic_clean(df)
440440
df[df_cols].fillna('').to_sql(temp_table, engine,index_label='dataframe_index')
441441
# TODO fillna('') probably redundant
442442

@@ -450,7 +450,7 @@ def append_id_to_dframe(engine: sqlalchemy.engine, df: pd.DataFrame, table, col_
450450
q = sql.SQL("SELECT t.*, tt.dataframe_index FROM {tt} tt LEFT JOIN {t} t ON {on_clause}").format(
451451
tt=sql.Identifier(temp_table),t=sql.Identifier(table),on_clause=on_clause
452452
)
453-
w = mr.generic_clean(pd.read_sql_query(q, connection).set_index('dataframe_index'))
453+
w = m.generic_clean(pd.read_sql_query(q, connection).set_index('dataframe_index'))
454454

455455
# drop temp db table
456456
q = sql.SQL("DROP TABLE {temp_table}").format(temp_table=sql.Identifier(temp_table))

src/election_anomaly/juris_and_munger/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from election_anomaly import database as db
44
import pandas as pd
55
from pandas.api.types import is_numeric_dtype
6-
from election_anomaly import munge_routines as mr
6+
from election_anomaly import munge as m
77
from election_anomaly import user_interface as ui
88
import re
99
import numpy as np
@@ -22,7 +22,7 @@ def load_contests(self, engine, contest_type: str, error: dict) -> dict:
2222
.fillna('none or unknown')
2323

2424
# add contest_type column
25-
df = mr.add_constant_column(df,'contest_type',contest_type)
25+
df = m.add_constant_column(df,'contest_type',contest_type)
2626

2727
# add 'none or unknown' record
2828
df = add_none_or_unknown(df,contest_type=contest_type)
@@ -125,7 +125,7 @@ def get_aux_data(self, aux_data_dir, err, project_root=None) -> dict:
125125

126126
# cast primary key(s) as int if possible, and set as (multi-)index
127127
primary_keys = self.aux_meta.loc[abbrev, 'primary_key'].split(',')
128-
df = mr.cast_cols_as_int(df,primary_keys,error_msg=f'In dataframe for {abbrev}')
128+
df = m.cast_cols_as_int(df,primary_keys,error_msg=f'In dataframe for {abbrev}')
129129
df.set_index(primary_keys, inplace=True)
130130

131131
aux_data_dict[abbrev] = df
@@ -151,7 +151,7 @@ def check_against_self(self):
151151
problems.append(f'''At least one source in cdf_elements.txt is not recognized: {b_str} ''')
152152

153153
# formulas have good syntax
154-
bad_formula = [x for x in self.cdf_elements.raw_identifier_formula.unique() if not mr.good_syntax(x)]
154+
bad_formula = [x for x in self.cdf_elements.raw_identifier_formula.unique() if not m.good_syntax(x)]
155155
if bad_formula:
156156
f_str = ','.join(bad_formula)
157157
problems.append(f'''At least one formula in cdf_elements.txt has bad syntax: {f_str} ''')
@@ -228,7 +228,7 @@ def read_munger_info_from_files(dir_path,project_root=None,aux_data_dir=None):
228228
# add column for list of fields used in formulas
229229
cdf_elements['fields'] = [[]]*cdf_elements.shape[0]
230230
for i,r in cdf_elements.iterrows():
231-
text_field_list,last_text = mr.text_fragments_and_fields(cdf_elements.loc[i,'raw_identifier_formula'])
231+
text_field_list,last_text = m.text_fragments_and_fields(cdf_elements.loc[i,'raw_identifier_formula'])
232232
cdf_elements.loc[i,'fields'] = [f for t,f in text_field_list]
233233

234234
# read formatting info
@@ -500,7 +500,7 @@ def check_munger_file_contents(munger_name,project_root):
500500
problems.append(f'''At least one source in cdf_elements.txt is not recognized: {b_str} ''')
501501

502502
# formulas have good syntax
503-
bad_formula = [x for x in cdf_elements.raw_identifier_formula.unique() if not mr.good_syntax(x)]
503+
bad_formula = [x for x in cdf_elements.raw_identifier_formula.unique() if not m.good_syntax(x)]
504504
if bad_formula:
505505
f_str = ','.join(bad_formula)
506506
problems.append(f'''At least one formula in cdf_elements.txt has bad syntax: {f_str} ''')
@@ -663,7 +663,7 @@ def load_juris_dframe_into_cdf(session,element,juris_path,project_root,error,loa
663663
cdf_e = pd.read_sql_table(e,session.bind)
664664
# for every instance of the enumeration in the current table, add id and othertype columns to the dataframe
665665
if e in df.columns:
666-
df = mr.enum_col_to_id_othertext(df,e,cdf_e)
666+
df = m.enum_col_to_id_othertext(df,e,cdf_e)
667667

668668
# get Ids for any foreign key (or similar) in the table, e.g., Party_Id, etc.
669669
fk_file_path = os.path.join(
File renamed without changes.
Binary file not shown.

src/election_anomaly/preparation/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Routines to aid in preparing Jurisdiction and Munger files
22
import pandas as pd
33
import os
4-
from election_anomaly import munge_routines as mr
54
from election_anomaly import user_interface as ui
65
from election_anomaly import juris_and_munger as jm
76
from pathlib import Path

src/election_anomaly/user_interface/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from configparser import ConfigParser
2-
from election_anomaly import munge_routines as mr
2+
from election_anomaly import munge as m
33
import pandas as pd
44
from pandas.errors import ParserError, ParserWarning
55
import numpy as np
@@ -178,7 +178,7 @@ def read_single_datafile(munger: jm.Munger, f_path: str, err: dict) -> [pd.DataF
178178
e = f'Nothing read from datafile; file type {munger.file_type} may be inconsistent, or datafile may be empty.'
179179
add_error(err,'format.txt',e)
180180
else:
181-
df = mr.generic_clean(df)
181+
df = m.generic_clean(df)
182182
err = jm.check_results_munger_compatibility(munger, df, err)
183183
return [df, err]
184184
except UnicodeDecodeError as ude:
@@ -200,7 +200,7 @@ def read_combine_results(mu: jm.Munger, results_file, project_root, err, aux_dat
200200
if [k for k in err.keys() if err[k] != None]:
201201
return pd.DataFrame(), err
202202
else:
203-
working = mr.cast_cols_as_int(working, mu.count_columns,mode='index')
203+
working = m.cast_cols_as_int(working, mu.count_columns,mode='index')
204204

205205
# merge with auxiliary files (if any)
206206
if aux_data_dir is not None:
@@ -209,7 +209,7 @@ def read_combine_results(mu: jm.Munger, results_file, project_root, err, aux_dat
209209
for abbrev,r in mu.aux_meta.iterrows():
210210
# cast foreign key columns of main results file as int if possible
211211
foreign_key = r['foreign_key'].split(',')
212-
working = mr.cast_cols_as_int(working,foreign_key)
212+
working = m.cast_cols_as_int(working,foreign_key)
213213
# rename columns
214214
col_rename = {f'{c}':f'{abbrev}[{c}]' for c in aux_data[abbrev].columns}
215215
# merge auxiliary info into <working>
@@ -253,13 +253,13 @@ def new_datafile(
253253
count_columns_by_name = [raw.columns[x] for x in munger.count_columns]
254254

255255
try:
256-
raw = mr.munge_clean(raw, munger)
256+
raw = m.munge_clean(raw, munger)
257257
except:
258258
err['datafile_error'] = ['Cleaning of datafile failed. Results not loaded to database.']
259259
return err
260260

261261
try:
262-
err = mr.raw_elements_to_cdf(session,project_root,juris,munger,raw,count_columns_by_name,err,ids=results_info)
262+
err = m.raw_elements_to_cdf(session,project_root,juris,munger,raw,count_columns_by_name,err,ids=results_info)
263263
except Exception as exc:
264264
e = f'Unspecified error during munging: {exc}\nResults not loaded to database.'
265265
add_error(err,'datafile_error',e)

0 commit comments

Comments
 (0)