Skip to content

Commit d6d4a72

Browse files
author
Eric Tsai
committed
rename db_routines to database
1 parent f865fc3 commit d6d4a72

9 files changed

Lines changed: 85 additions & 90 deletions

File tree

src/election_anomaly/__init__.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from election_anomaly import db_routines as dbr
1+
from election_anomaly import database as db
22
from election_anomaly import user_interface as ui
33
from election_anomaly import munge_routines as mr
44
from sqlalchemy.orm import sessionmaker
@@ -60,13 +60,13 @@ def __init__(self):
6060
self.tracker = dict()
6161

6262
# create db if it does not already exist
63-
error = dbr.establish_connection(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
63+
error = db.establish_connection(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
6464
if error:
65-
dbr.create_new_db(self.d['project_root'], self.d['db_paramfile'], self.d['db_name'])
65+
db.create_new_db(self.d['project_root'], self.d['db_paramfile'], self.d['db_name'])
6666

6767
# connect to db
6868
try:
69-
self.engine = dbr.sql_alchemy_connect(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
69+
self.engine = db.sql_alchemy_connect(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
7070
Session = sessionmaker(bind=self.engine)
7171
self.session = Session()
7272
except Exception as e:
@@ -228,8 +228,8 @@ def check_errors(self):
228228

229229
def track_results(self):
230230
filename = self.d['results_file']
231-
top_reporting_unit_id = dbr.name_to_id(self.session,'ReportingUnit', self.d['top_reporting_unit'])
232-
election_id = dbr.name_to_id(self.session,'Election',self.d['election'])
231+
top_reporting_unit_id = db.name_to_id(self.session,'ReportingUnit', self.d['top_reporting_unit'])
232+
election_id = db.name_to_id(self.session,'Election',self.d['election'])
233233

234234
data = pd.DataFrame(
235235
[[self.d['results_short_name'],filename,
@@ -238,12 +238,12 @@ def track_results(self):
238238
columns=['short_name', 'file_name',
239239
'download_date', 'source',
240240
'note', 'ReportingUnit_Id', 'Election_Id','created_at'])
241-
e = dbr.insert_to_cdf_db(self.session.bind, data, '_datafile')
241+
e = db.insert_to_cdf_db(self.session.bind, data, '_datafile')
242242
if e:
243243
return [0, 0], e
244244
else:
245245
col_map = {'short_name':'short_name'}
246-
datafile_id = dbr.append_id_to_dframe(
246+
datafile_id = db.append_id_to_dframe(
247247
self.session.bind,data,'_datafile',col_map=col_map
248248
).iloc[0]['_datafile_Id']
249249
return [datafile_id, election_id], e
@@ -540,7 +540,7 @@ def add_elements_from_results_file(self, elements: iter, error: dict, results_fi
540540
wr, mu, error = ui.read_results(kwargs,error)
541541

542542
for element in elements:
543-
name_field = dbr.get_name_field(element)
543+
name_field = db.get_name_field(element)
544544
# append <element>_raw
545545
wr, error = mr.add_munged_column(
546546
wr, mu, element, error, mode=mu.cdf_elements.loc[element, 'source'],
@@ -583,7 +583,7 @@ def starter_dictionary(self,include_existing=True) -> str:
583583
old.drop()
584584
for element in elements:
585585
w[element] = prep.get_element(self.d['jurisdiction_path'],element)
586-
name_field = dbr.get_name_field(element)
586+
name_field = db.get_name_field(element)
587587
w[element] = mr.add_constant_column(w[element],'cdf_element',element)
588588
w[element].rename(columns={name_field:'cdf_internal_name'},inplace=True)
589589
w[element]['raw_identifier_value'] = w[element]['cdf_internal_name']
@@ -650,21 +650,21 @@ def __init__(self):
650650
'db_name', 'results_file'])
651651
self.d['results_file_short'] = get_filename(self.d['results_file'])
652652

653-
eng = dbr.sql_alchemy_connect(paramfile=self.d['db_paramfile'],
653+
eng = db.sql_alchemy_connect(paramfile=self.d['db_paramfile'],
654654
db_name=self.d['db_name'])
655655
Session = sessionmaker(bind=eng)
656656
self.session = Session()
657657

658658

659659
def display_options(self, input, verbose=False, filters=None):
660660
if not verbose:
661-
results = dbr.get_input_options(self.session, input, False)
661+
results = db.get_input_options(self.session, input, False)
662662
else:
663663
if not filters:
664-
df = pd.DataFrame(dbr.get_input_options(self.session, input, True))
665-
results = dbr.package_display_results(df)
664+
df = pd.DataFrame(db.get_input_options(self.session, input, True))
665+
results = db.package_display_results(df)
666666
else:
667-
results = dbr.get_filtered_input_options(self.session, input, filters)
667+
results = db.get_filtered_input_options(self.session, input, filters)
668668
if results:
669669
return results
670670
return None
@@ -678,9 +678,9 @@ def top_counts_by_vote_type(self, election, rollup_unit, sub_unit):
678678
else:
679679
connection = self.session.bind.raw_connection()
680680
cursor = connection.cursor()
681-
rollup_unit_id = dbr.name_to_id(self.session, 'ReportingUnit', rollup_unit)
682-
sub_unit_id = dbr.name_to_id(self.session, 'ReportingUnitType', sub_unit)
683-
election_id = dbr.name_to_id(self.session, 'Election', election)
681+
rollup_unit_id = db.name_to_id(self.session, 'ReportingUnit', rollup_unit)
682+
sub_unit_id = db.name_to_id(self.session, 'ReportingUnitType', sub_unit)
683+
election_id = db.name_to_id(self.session, 'Election', election)
684684
err_str = a.create_rollup(cursor, d['rollup_directory'], rollup_unit_id,
685685
sub_unit_id, election_id)
686686
connection.close()
@@ -694,9 +694,9 @@ def top_counts(self, rollup_unit, sub_unit):
694694
print("Data not created.")
695695
return
696696
else:
697-
rollup_unit_id = dbr.name_to_id(self.session, 'ReportingUnit', rollup_unit)
698-
sub_unit_id = dbr.name_to_id(self.session, 'ReportingUnitType', sub_unit)
699-
results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
697+
rollup_unit_id = db.name_to_id(self.session, 'ReportingUnit', rollup_unit)
698+
sub_unit_id = db.name_to_id(self.session, 'ReportingUnitType', sub_unit)
699+
results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
700700
rollup = a.create_rollup(self.session, d['rollup_directory'], top_ru_id=rollup_unit_id,
701701
sub_rutype_id=sub_unit_id, sub_rutype_othertext='', datafile_id_list=results_info[0],
702702
election_id=results_info[1], by_vote_type=False)
@@ -718,25 +718,25 @@ def scatter(self, jurisdiction, subdivision_type,
718718
print(error)
719719
print("Data not created.")
720720
return
721-
jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
722-
subdivision_type_id = dbr.name_to_id(self.session, 'ReportingUnitType', subdivision_type)
723-
h_election_id = dbr.name_to_id(self.session, 'Election', h_election)
724-
v_election_id = dbr.name_to_id(self.session, 'Election', v_election)
721+
jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
722+
subdivision_type_id = db.name_to_id(self.session, 'ReportingUnitType', subdivision_type)
723+
h_election_id = db.name_to_id(self.session, 'Election', h_election)
724+
v_election_id = db.name_to_id(self.session, 'Election', v_election)
725725
# *_type is either candidates or contests
726726
h_count_item_type, h_type = self.split_category_input(h_category)
727727
v_count_item_type, v_type = self.split_category_input(v_category)
728728
if h_count == 'All Candidates' or h_count == 'All Contests':
729729
h_count_id = -1
730730
elif h_type == 'candidates':
731-
h_count_id = dbr.name_to_id(self.session, 'Candidate', h_count)
731+
h_count_id = db.name_to_id(self.session, 'Candidate', h_count)
732732
elif h_type == 'contests':
733-
h_count_id = dbr.name_to_id(self.session, 'CandidateContest', h_count)
733+
h_count_id = db.name_to_id(self.session, 'CandidateContest', h_count)
734734
if v_count == 'All Candidates' or v_count == 'All Contests':
735735
v_count_id = -1
736736
elif v_type == 'candidates':
737-
v_count_id = dbr.name_to_id(self.session, 'Candidate', v_count)
737+
v_count_id = db.name_to_id(self.session, 'Candidate', v_count)
738738
elif v_type == 'contests':
739-
v_count_id = dbr.name_to_id(self.session, 'CandidateContest', v_count)
739+
v_count_id = db.name_to_id(self.session, 'CandidateContest', v_count)
740740
h_count_item_type, h_type = self.split_category_input(h_category)
741741
v_count_item_type, v_type = self.split_category_input(v_category)
742742
agg_results = a.create_scatter(self.session, jurisdiction_id, subdivision_type_id,
@@ -756,11 +756,11 @@ def bar(self, jurisdiction, contest_type=None, contest=None, fig_type=None):
756756
print(error)
757757
print("Data not created.")
758758
return
759-
jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
760-
most_granular_id = dbr.name_to_id(self.session, 'ReportingUnitType',
759+
jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
760+
most_granular_id = db.name_to_id(self.session, 'ReportingUnitType',
761761
d['sub_reporting_unit_type'])
762-
hierarchy = dbr.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
763-
results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
762+
hierarchy = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
763+
results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
764764
# bar chart always at one level below top reporting unit
765765
agg_results = a.create_bar(self.session, jurisdiction_id, hierarchy[1], \
766766
contest_type, contest, results_info[1], False)
@@ -790,11 +790,11 @@ def export_outlier_data(self, jurisdiction, contest=None):
790790
print(error)
791791
print("Data not created.")
792792
return
793-
jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
794-
most_granular_id = dbr.name_to_id(self.session, 'ReportingUnitType',
793+
jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
794+
most_granular_id = db.name_to_id(self.session, 'ReportingUnitType',
795795
d['sub_reporting_unit_type'])
796-
hierarchy = dbr.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
797-
results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
796+
hierarchy = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
797+
results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
798798
# bar chart always at one level below top reporting unit
799799
agg_results = a.create_bar(self.session, jurisdiction_id, hierarchy[1], \
800800
None, contest, results_info[1], True)

src/election_anomaly/analyze/__init__.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import matplotlib.pyplot as plt
1111
from pathlib import Path
1212
from pandas.api.types import is_numeric_dtype
13-
from election_anomaly import db_routines as dbr
13+
from election_anomaly import database as db
1414
import scipy.spatial.distance as dist
1515
from scipy import stats
1616
import math
@@ -46,14 +46,14 @@ def create_rollup(
4646
"""
4747

4848
if not datafile_list:
49-
datafile_list, e = dbr.data_file_list(cursor, [election_id], by='Id')
49+
datafile_list, e = db.data_file_list(cursor, [election_id], by='Id')
5050
if e:
5151
return e
5252
by = 'Id'
5353
if len(datafile_list) == 0:
5454
return f'No datafiles found for Election_Id {election_id}'
5555
# set exclude_total
56-
vote_type_list, err_str = dbr.vote_type_list(cursor, datafile_list, by=by)
56+
vote_type_list, err_str = db.vote_type_list(cursor, datafile_list, by=by)
5757
if err_str:
5858
return err_str
5959
elif len(vote_type_list) == 0:
@@ -65,9 +65,9 @@ def create_rollup(
6565
exclude_total = False
6666

6767
# get names from ids
68-
top_ru = dbr.name_from_id(cursor,'ReportingUnit',top_ru_id)#.replace(" ","-")
69-
election = dbr.name_from_id(cursor,'Election',election_id)#.replace(" ","-")
70-
sub_rutype = dbr.name_from_id(cursor, 'ReportingUnitType', sub_rutype_id)
68+
top_ru = db.name_from_id(cursor,'ReportingUnit',top_ru_id)#.replace(" ","-")
69+
election = db.name_from_id(cursor,'Election',election_id)#.replace(" ","-")
70+
sub_rutype = db.name_from_id(cursor, 'ReportingUnitType', sub_rutype_id)
7171

7272
# create path to export directory
7373
leaf_dir = os.path.join(target_dir, election, top_ru, f'by_{sub_rutype}')
@@ -89,7 +89,7 @@ def create_rollup(
8989
while os.path.isfile(os.path.join(leaf_dir, rollup_file)):
9090
rollup_file = input(f'There is already a file called {rollup_file}. Pick another name.\n')
9191

92-
err = dbr.export_rollup_to_csv(
92+
err = db.export_rollup_to_csv(
9393
cursor, top_ru, sub_rutype, contest_type, datafile_list,
9494
os.path.join(leaf_dir, rollup_file), by=by, exclude_total=exclude_total
9595
)
@@ -126,24 +126,24 @@ def create_scatter(session, jurisdiction_id, subdivision_type_id,
126126
if h_count_id == -1:
127127
x = f'All {h_type}'
128128
elif h_type == 'candidates':
129-
x = dbr.name_from_id(cursor, 'Candidate', h_count_id)
129+
x = db.name_from_id(cursor, 'Candidate', h_count_id)
130130
elif h_type == 'contests':
131-
x = dbr.name_from_id(cursor, 'CandidateContest', h_count_id)
131+
x = db.name_from_id(cursor, 'CandidateContest', h_count_id)
132132
if v_count_id == -1:
133133
y = f'All {v_type}'
134134
elif v_type == 'candidates':
135-
y = dbr.name_from_id(cursor, 'Candidate', v_count_id)
135+
y = db.name_from_id(cursor, 'Candidate', v_count_id)
136136
elif v_type == 'contests':
137-
y = dbr.name_from_id(cursor, 'CandidateContest', v_count_id)
138-
jurisdiction = dbr.name_from_id(cursor, 'ReportingUnit', jurisdiction_id)
137+
y = db.name_from_id(cursor, 'CandidateContest', v_count_id)
138+
jurisdiction = db.name_from_id(cursor, 'ReportingUnit', jurisdiction_id)
139139
pivot_df = pd.pivot_table(unsummed, values='Count',
140140
index=['Name'], columns='Selection').reset_index()
141141

142142
# package up results
143143
results = package_results(pivot_df, jurisdiction, x, y)
144-
results["x-election"] = dbr.name_from_id(cursor, 'Election', h_election_id)
145-
results["y-election"] = dbr.name_from_id(cursor, 'Election', v_election_id)
146-
results["subdivision_type"] = dbr.name_from_id(cursor, 'ReportingUnitType', subdivision_type_id)
144+
results["x-election"] = db.name_from_id(cursor, 'Election', h_election_id)
145+
results["y-election"] = db.name_from_id(cursor, 'Election', v_election_id)
146+
results["subdivision_type"] = db.name_from_id(cursor, 'ReportingUnitType', subdivision_type_id)
147147
results["x-count_item_type"] = h_category
148148
results["y-count_item_type"] = v_category
149149

@@ -178,7 +178,7 @@ def package_results(data, jurisdiction, x, y, restrict=None):
178178
def get_data_for_scatter(session, jurisdiction_id, subdivision_type_id,
179179
election_id, count_item_type, filter_id, count_type):
180180
"""Since this could be data across 2 elections, grab data one election at a time"""
181-
unsummed = dbr.get_candidate_votecounts(session, election_id, jurisdiction_id, subdivision_type_id)
181+
unsummed = db.get_candidate_votecounts(session, election_id, jurisdiction_id, subdivision_type_id)
182182
# limit to relevant data
183183
if count_type == 'candidates':
184184
filter_column = 'Candidate_Id'
@@ -199,7 +199,7 @@ def get_data_for_scatter(session, jurisdiction_id, subdivision_type_id,
199199
unsummed['Candidate_Id'] = filter_id
200200

201201
if count_type == 'contests' and filter_id != -1:
202-
selection = dbr.name_from_id(session, 'CandidateContest', filter_id)
202+
selection = db.name_from_id(session, 'CandidateContest', filter_id)
203203
unsummed['Selection'] = selection
204204
elif count_type == 'contests' and filter_id == -1:
205205
unsummed['Selection'] = 'All contests'
@@ -216,7 +216,7 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
216216
connection = session.bind.raw_connection()
217217
cursor = connection.cursor()
218218

219-
unsummed = dbr.get_candidate_votecounts(session, election_id, top_ru_id, subdivision_type_id)
219+
unsummed = db.get_candidate_votecounts(session, election_id, top_ru_id, subdivision_type_id)
220220

221221
if contest_type:
222222
unsummed = unsummed[unsummed['contest_district_type'] == contest_type]
@@ -254,9 +254,9 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
254254
'max_margins_pct': 'margins_pct'}, inplace=True)
255255

256256
candidates = temp_df['Candidate_Id'].unique()
257-
x = dbr.name_from_id(cursor, 'Candidate', int(candidates[0]))
258-
y = dbr.name_from_id(cursor, 'Candidate', int(candidates[1]))
259-
jurisdiction = dbr.name_from_id(cursor, 'ReportingUnit', top_ru_id)
257+
x = db.name_from_id(cursor, 'Candidate', int(candidates[0]))
258+
y = db.name_from_id(cursor, 'Candidate', int(candidates[1]))
259+
jurisdiction = db.name_from_id(cursor, 'ReportingUnit', top_ru_id)
260260

261261
pivot_df = pd.pivot_table(temp_df, values='Count',
262262
index=['Name'], columns='Selection').reset_index()
@@ -265,9 +265,9 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
265265
.sort_values('score', ascending=False).reset_index()
266266

267267
results = package_results(pivot_df, jurisdiction, x, y, restrict=8)
268-
results["election"] = dbr.name_from_id(cursor, 'Election', election_id)
269-
results["contest"] = dbr.name_from_id(cursor, 'Contest', int(temp_df.iloc[0]['Contest_Id']))
270-
results["subdivision_type"] = dbr.name_from_id(cursor, 'ReportingUnitType', \
268+
results["election"] = db.name_from_id(cursor, 'Election', election_id)
269+
results["contest"] = db.name_from_id(cursor, 'Contest', int(temp_df.iloc[0]['Contest_Id']))
270+
results["subdivision_type"] = db.name_from_id(cursor, 'ReportingUnitType', \
271271
int(temp_df.iloc[0]['ReportingUnitType_Id']))
272272
results["count_item_type"] = temp_df.iloc[0]['CountItemType']
273273
results["votes_at_stake"] = temp_df.iloc[0]['votes_at_stake']

src/election_anomaly/db_routines/__init__.py renamed to src/election_anomaly/database/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/python3
2-
# db_routines/__init__.py
2+
# database/__init__.py
33

44
import psycopg2
55
import sqlalchemy
@@ -17,7 +17,7 @@
1717
import pandas as pd
1818
from election_anomaly import munge_routines as mr
1919
import re
20-
from election_anomaly.db_routines import create_cdf_db as db_cdf
20+
from election_anomaly.database import create_cdf_db as db_cdf
2121
import os
2222
import numpy as np
2323
from sqlalchemy import MetaData, Table, Column, Integer, Float

src/election_anomaly/db_routines/create_cdf_db/__init__.py renamed to src/election_anomaly/database/create_cdf_db/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from psycopg2 import sql
1010
import os
1111
import pandas as pd
12-
from election_anomaly import db_routines as dbr
12+
from election_anomaly import database as db
1313
import datetime
1414

1515

@@ -57,7 +57,7 @@ def create_common_data_format_tables(session,dirpath='CDF_schema_def_info/'):
5757
elif element == 'CandidateSelection':
5858
create_indices = [['Candidate_Id', 'Party_Id']]
5959
else:
60-
# create_indices = [[dbr.get_name_field(element)]]
60+
# create_indices = [[db.get_name_field(element)]]
6161
create_indices = None
6262
# TODO fix for efficiency -- note <contest_type>Contest, <contest_type>Selection may need special treatment
6363

@@ -230,7 +230,7 @@ def load_bms(engine, bms_list: list):
230230
bms_df = pd.DataFrame([[s] for s in bms_list], columns=['Name'])
231231

232232
# Create 3 entries in Selection table
233-
id_list = dbr.add_records_to_selection_table(engine, len(bms_list))
233+
id_list = db.add_records_to_selection_table(engine, len(bms_list))
234234

235235
# Create entries in BallotMeasureSelection table
236236
bms_df['Id'] = pd.Series(id_list,index=bms_df.index)
Binary file not shown.

0 commit comments

Comments
 (0)