ElectionDataAnalysis
diff --git a/‎src/election_anomaly/__init__.py‎
Lines changed: 37 additions & 37 deletions b/‎src/election_anomaly/__init__.py‎
Lines changed: 37 additions & 37 deletions
diff --git a/‎src/election_anomaly/analyze/__init__.py‎
Lines changed: 24 additions & 24 deletions b/‎src/election_anomaly/analyze/__init__.py‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎…election_anomaly/db_routines/__init__.py‎ ‎src/election_anomaly/database/__init__.py‎src/election_anomaly/db_routines/__init__.py renamed to src/election_anomaly/database/__init__.py
Lines changed: 2 additions & 2 deletions b/‎…election_anomaly/db_routines/__init__.py‎ ‎src/election_anomaly/database/__init__.py‎src/election_anomaly/db_routines/__init__.py renamed to src/election_anomaly/database/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎…ly/db_routines/create_cdf_db/__init__.py‎ ‎…omaly/database/create_cdf_db/__init__.py‎src/election_anomaly/db_routines/create_cdf_db/__init__.py renamed to src/election_anomaly/database/create_cdf_db/__init__.py
Lines changed: 3 additions & 3 deletions b/‎…ly/db_routines/create_cdf_db/__init__.py‎ ‎…omaly/database/create_cdf_db/__init__.py‎src/election_anomaly/db_routines/create_cdf_db/__init__.py renamed to src/election_anomaly/database/create_cdf_db/__init__.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/election_anomaly/db_routines/__pycache__/__init__.cpython-38.pyc‎
-15.1 KB b/‎src/election_anomaly/db_routines/__pycache__/__init__.cpython-38.pyc‎
-15.1 KB
@@ -1,4 +1,4 @@
-from election_anomaly import db_routines as dbr
+from election_anomaly import database as db
 from election_anomaly import user_interface as ui
 from election_anomaly import munge_routines as mr
 from sqlalchemy.orm import sessionmaker
@@ -60,13 +60,13 @@ def __init__(self):
 		self.tracker = dict()
 
 		# create db if it does not already exist
-		error = dbr.establish_connection(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
+		error = db.establish_connection(paramfile=self.d['db_paramfile'], db_name=self.d['db_name'])
 		if error:
-			dbr.create_new_db(self.d['project_root'], self.d['db_paramfile'],  self.d['db_name'])
+			db.create_new_db(self.d['project_root'], self.d['db_paramfile'],  self.d['db_name'])
 
 		# connect to db
 		try:
-			self.engine = dbr.sql_alchemy_connect(paramfile=self.d['db_paramfile'],  db_name=self.d['db_name'])
+			self.engine = db.sql_alchemy_connect(paramfile=self.d['db_paramfile'],  db_name=self.d['db_name'])
 			Session = sessionmaker(bind=self.engine)
 			self.session = Session()
 		except Exception as e:
@@ -228,8 +228,8 @@ def check_errors(self):
 
 	def track_results(self):
 		filename = self.d['results_file']
-		top_reporting_unit_id = dbr.name_to_id(self.session,'ReportingUnit', self.d['top_reporting_unit'])
-		election_id = dbr.name_to_id(self.session,'Election',self.d['election'])
+		top_reporting_unit_id = db.name_to_id(self.session,'ReportingUnit', self.d['top_reporting_unit'])
+		election_id = db.name_to_id(self.session,'Election',self.d['election'])
 
 		data = pd.DataFrame(
 			[[self.d['results_short_name'],filename,
@@ -238,12 +238,12 @@ def track_results(self):
 			columns=['short_name', 'file_name',
 					 'download_date', 'source',
 					 'note', 'ReportingUnit_Id', 'Election_Id','created_at'])
-		e = dbr.insert_to_cdf_db(self.session.bind, data, '_datafile')
+		e = db.insert_to_cdf_db(self.session.bind, data, '_datafile')
 		if e:
 			return [0, 0], e
 		else:
 			col_map = {'short_name':'short_name'}
-			datafile_id = dbr.append_id_to_dframe(
+			datafile_id = db.append_id_to_dframe(
 				self.session.bind,data,'_datafile',col_map=col_map
 			).iloc[0]['_datafile_Id']
 		return [datafile_id, election_id], e
@@ -540,7 +540,7 @@ def add_elements_from_results_file(self, elements: iter, error: dict, results_fi
 		wr, mu, error = ui.read_results(kwargs,error)
 
 		for element in elements:
-			name_field = dbr.get_name_field(element)
+			name_field = db.get_name_field(element)
 			# append <element>_raw
 			wr, error = mr.add_munged_column(
 				wr, mu, element, error, mode=mu.cdf_elements.loc[element, 'source'],
@@ -583,7 +583,7 @@ def starter_dictionary(self,include_existing=True) -> str:
 			old.drop()
 		for element in elements:
 			w[element] = prep.get_element(self.d['jurisdiction_path'],element)
-			name_field = dbr.get_name_field(element)
+			name_field = db.get_name_field(element)
 			w[element] = mr.add_constant_column(w[element],'cdf_element',element)
 			w[element].rename(columns={name_field:'cdf_internal_name'},inplace=True)
 			w[element]['raw_identifier_value'] = w[element]['cdf_internal_name']
@@ -650,21 +650,21 @@ def __init__(self):
             'db_name', 'results_file'])
         self.d['results_file_short'] = get_filename(self.d['results_file'])
 
-        eng = dbr.sql_alchemy_connect(paramfile=self.d['db_paramfile'],
+        eng = db.sql_alchemy_connect(paramfile=self.d['db_paramfile'],
             db_name=self.d['db_name'])
         Session = sessionmaker(bind=eng)
         self.session = Session()
 
 
     def display_options(self, input, verbose=False, filters=None):
         if not verbose:
-            results = dbr.get_input_options(self.session, input, False)
+            results = db.get_input_options(self.session, input, False)
         else:
             if not filters:
-                df = pd.DataFrame(dbr.get_input_options(self.session, input, True))
-                results = dbr.package_display_results(df)
+                df = pd.DataFrame(db.get_input_options(self.session, input, True))
+                results = db.package_display_results(df)
             else:
-                results = dbr.get_filtered_input_options(self.session, input, filters)
+                results = db.get_filtered_input_options(self.session, input, filters)
         if results:
             return results
         return None
@@ -678,9 +678,9 @@ def top_counts_by_vote_type(self, election, rollup_unit, sub_unit):
         else:
             connection = self.session.bind.raw_connection()
             cursor = connection.cursor()
-            rollup_unit_id = dbr.name_to_id(self.session, 'ReportingUnit', rollup_unit)
-            sub_unit_id = dbr.name_to_id(self.session, 'ReportingUnitType', sub_unit)
-            election_id = dbr.name_to_id(self.session, 'Election', election)
+            rollup_unit_id = db.name_to_id(self.session, 'ReportingUnit', rollup_unit)
+            sub_unit_id = db.name_to_id(self.session, 'ReportingUnitType', sub_unit)
+            election_id = db.name_to_id(self.session, 'Election', election)
             err_str = a.create_rollup(cursor, d['rollup_directory'], rollup_unit_id,
                 sub_unit_id, election_id)
             connection.close()
@@ -694,9 +694,9 @@ def top_counts(self, rollup_unit, sub_unit):
             print("Data not created.")
             return
         else:
-            rollup_unit_id = dbr.name_to_id(self.session, 'ReportingUnit', rollup_unit)
-            sub_unit_id = dbr.name_to_id(self.session, 'ReportingUnitType', sub_unit)
-            results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
+            rollup_unit_id = db.name_to_id(self.session, 'ReportingUnit', rollup_unit)
+            sub_unit_id = db.name_to_id(self.session, 'ReportingUnitType', sub_unit)
+            results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
             rollup = a.create_rollup(self.session, d['rollup_directory'], top_ru_id=rollup_unit_id,
                 sub_rutype_id=sub_unit_id, sub_rutype_othertext='', datafile_id_list=results_info[0], 
                 election_id=results_info[1], by_vote_type=False)
@@ -718,25 +718,25 @@ def scatter(self, jurisdiction, subdivision_type,
             print(error)
             print("Data not created.")
             return
-        jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
-        subdivision_type_id = dbr.name_to_id(self.session, 'ReportingUnitType', subdivision_type)
-        h_election_id = dbr.name_to_id(self.session, 'Election', h_election)
-        v_election_id = dbr.name_to_id(self.session, 'Election', v_election)
+        jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
+        subdivision_type_id = db.name_to_id(self.session, 'ReportingUnitType', subdivision_type)
+        h_election_id = db.name_to_id(self.session, 'Election', h_election)
+        v_election_id = db.name_to_id(self.session, 'Election', v_election)
         # *_type is either candidates or contests
         h_count_item_type, h_type = self.split_category_input(h_category)
         v_count_item_type, v_type = self.split_category_input(v_category)
         if h_count == 'All Candidates' or h_count == 'All Contests':
             h_count_id = -1
         elif h_type == 'candidates':
-            h_count_id = dbr.name_to_id(self.session, 'Candidate', h_count) 
+            h_count_id = db.name_to_id(self.session, 'Candidate', h_count) 
         elif h_type == 'contests':
-            h_count_id = dbr.name_to_id(self.session, 'CandidateContest', h_count) 
+            h_count_id = db.name_to_id(self.session, 'CandidateContest', h_count) 
         if v_count == 'All Candidates' or v_count == 'All Contests':
             v_count_id = -1
         elif v_type == 'candidates':
-            v_count_id = dbr.name_to_id(self.session, 'Candidate', v_count) 
+            v_count_id = db.name_to_id(self.session, 'Candidate', v_count) 
         elif v_type == 'contests':
-            v_count_id = dbr.name_to_id(self.session, 'CandidateContest', v_count) 
+            v_count_id = db.name_to_id(self.session, 'CandidateContest', v_count) 
         h_count_item_type, h_type = self.split_category_input(h_category)
         v_count_item_type, v_type = self.split_category_input(v_category)
         agg_results = a.create_scatter(self.session, jurisdiction_id, subdivision_type_id, 
@@ -756,11 +756,11 @@ def bar(self, jurisdiction, contest_type=None, contest=None, fig_type=None):
             print(error)
             print("Data not created.")
             return
-        jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
-        most_granular_id = dbr.name_to_id(self.session, 'ReportingUnitType', 
+        jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
+        most_granular_id = db.name_to_id(self.session, 'ReportingUnitType', 
             d['sub_reporting_unit_type'])
-        hierarchy = dbr.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
-        results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
+        hierarchy = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
+        results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
 		# bar chart always at one level below top reporting unit
         agg_results = a.create_bar(self.session, jurisdiction_id, hierarchy[1], \
             contest_type, contest, results_info[1], False)
@@ -790,11 +790,11 @@ def export_outlier_data(self, jurisdiction, contest=None):
             print(error)
             print("Data not created.")
             return
-        jurisdiction_id = dbr.name_to_id(self.session, 'ReportingUnit', jurisdiction)
-        most_granular_id = dbr.name_to_id(self.session, 'ReportingUnitType', 
+        jurisdiction_id = db.name_to_id(self.session, 'ReportingUnit', jurisdiction)
+        most_granular_id = db.name_to_id(self.session, 'ReportingUnitType', 
             d['sub_reporting_unit_type'])
-        hierarchy = dbr.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
-        results_info = dbr.get_datafile_info(self.session, self.d['results_file_short'])
+        hierarchy = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id, most_granular_id)
+        results_info = db.get_datafile_info(self.session, self.d['results_file_short'])
 		# bar chart always at one level below top reporting unit
         agg_results = a.create_bar(self.session, jurisdiction_id, hierarchy[1], \
             None, contest, results_info[1], True)
 
@@ -10,7 +10,7 @@
 import matplotlib.pyplot as plt
 from pathlib import Path
 from pandas.api.types import is_numeric_dtype
-from election_anomaly import db_routines as dbr
+from election_anomaly import database as db
 import scipy.spatial.distance as dist
 from scipy import stats
 import math
@@ -46,14 +46,14 @@ def create_rollup(
 	"""
 
 	if not datafile_list:
-		datafile_list, e = dbr.data_file_list(cursor, [election_id], by='Id')
+		datafile_list, e = db.data_file_list(cursor, [election_id], by='Id')
 		if e:
 			return e
 		by = 'Id'
 		if len(datafile_list) == 0:
 			return f'No datafiles found for Election_Id {election_id}'
 	# set exclude_total
-	vote_type_list, err_str = dbr.vote_type_list(cursor, datafile_list, by=by)
+	vote_type_list, err_str = db.vote_type_list(cursor, datafile_list, by=by)
 	if err_str:
 		return err_str
 	elif len(vote_type_list) == 0:
@@ -65,9 +65,9 @@ def create_rollup(
 		exclude_total = False
 
 	# get names from ids
-	top_ru = dbr.name_from_id(cursor,'ReportingUnit',top_ru_id)#.replace(" ","-")
-	election = dbr.name_from_id(cursor,'Election',election_id)#.replace(" ","-")
-	sub_rutype = dbr.name_from_id(cursor, 'ReportingUnitType', sub_rutype_id)
+	top_ru = db.name_from_id(cursor,'ReportingUnit',top_ru_id)#.replace(" ","-")
+	election = db.name_from_id(cursor,'Election',election_id)#.replace(" ","-")
+	sub_rutype = db.name_from_id(cursor, 'ReportingUnitType', sub_rutype_id)
 
 	# create path to export directory
 	leaf_dir = os.path.join(target_dir, election, top_ru, f'by_{sub_rutype}')
@@ -89,7 +89,7 @@ def create_rollup(
 		while os.path.isfile(os.path.join(leaf_dir, rollup_file)):
 			rollup_file = input(f'There is already a file called {rollup_file}. Pick another name.\n')
 
-		err = dbr.export_rollup_to_csv(
+		err = db.export_rollup_to_csv(
 			cursor, top_ru, sub_rutype, contest_type, datafile_list,
 			os.path.join(leaf_dir, rollup_file), by=by, exclude_total=exclude_total
 		)
@@ -126,24 +126,24 @@ def create_scatter(session, jurisdiction_id, subdivision_type_id,
 	if h_count_id == -1:
 		x = f'All {h_type}'
 	elif h_type == 'candidates':
-		x = dbr.name_from_id(cursor, 'Candidate', h_count_id) 
+		x = db.name_from_id(cursor, 'Candidate', h_count_id) 
 	elif h_type == 'contests':
-		x = dbr.name_from_id(cursor, 'CandidateContest', h_count_id) 
+		x = db.name_from_id(cursor, 'CandidateContest', h_count_id) 
 	if v_count_id == -1:
 		y = f'All {v_type}'
 	elif v_type == 'candidates':
-		y = dbr.name_from_id(cursor, 'Candidate', v_count_id) 
+		y = db.name_from_id(cursor, 'Candidate', v_count_id) 
 	elif v_type == 'contests':
-		y = dbr.name_from_id(cursor, 'CandidateContest', v_count_id) 
-	jurisdiction = dbr.name_from_id(cursor, 'ReportingUnit', jurisdiction_id)
+		y = db.name_from_id(cursor, 'CandidateContest', v_count_id) 
+	jurisdiction = db.name_from_id(cursor, 'ReportingUnit', jurisdiction_id)
 	pivot_df = pd.pivot_table(unsummed, values='Count',
 		index=['Name'], columns='Selection').reset_index()
 
 	# package up results
 	results = package_results(pivot_df, jurisdiction, x, y)
-	results["x-election"] = dbr.name_from_id(cursor, 'Election', h_election_id)
-	results["y-election"] = dbr.name_from_id(cursor, 'Election', v_election_id)
-	results["subdivision_type"] = dbr.name_from_id(cursor, 'ReportingUnitType', subdivision_type_id)
+	results["x-election"] = db.name_from_id(cursor, 'Election', h_election_id)
+	results["y-election"] = db.name_from_id(cursor, 'Election', v_election_id)
+	results["subdivision_type"] = db.name_from_id(cursor, 'ReportingUnitType', subdivision_type_id)
 	results["x-count_item_type"] = h_category
 	results["y-count_item_type"] = v_category
 
@@ -178,7 +178,7 @@ def package_results(data, jurisdiction, x, y, restrict=None):
 def get_data_for_scatter(session, jurisdiction_id, subdivision_type_id, 
 	election_id, count_item_type, filter_id, count_type):
 	"""Since this could be data across 2 elections, grab data one election at a time"""
-	unsummed = dbr.get_candidate_votecounts(session, election_id, jurisdiction_id, subdivision_type_id)
+	unsummed = db.get_candidate_votecounts(session, election_id, jurisdiction_id, subdivision_type_id)
 	#  limit to relevant data
 	if count_type == 'candidates':
 		filter_column = 'Candidate_Id'
@@ -199,7 +199,7 @@ def get_data_for_scatter(session, jurisdiction_id, subdivision_type_id,
 		unsummed['Candidate_Id'] = filter_id
 
 	if count_type == 'contests' and filter_id != -1:
-		selection = dbr.name_from_id(session, 'CandidateContest', filter_id)
+		selection = db.name_from_id(session, 'CandidateContest', filter_id)
 		unsummed['Selection'] = selection
 	elif count_type == 'contests' and filter_id == -1:
 		unsummed['Selection'] = 'All contests'
@@ -216,7 +216,7 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
 	connection = session.bind.raw_connection()
 	cursor = connection.cursor()
 
-	unsummed = dbr.get_candidate_votecounts(session, election_id, top_ru_id, subdivision_type_id)
+	unsummed = db.get_candidate_votecounts(session, election_id, top_ru_id, subdivision_type_id)
 
 	if contest_type:
 		unsummed = unsummed[unsummed['contest_district_type'] == contest_type]
@@ -254,9 +254,9 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
 			'max_margins_pct': 'margins_pct'}, inplace=True)
 
 		candidates = temp_df['Candidate_Id'].unique()
-		x = dbr.name_from_id(cursor, 'Candidate', int(candidates[0]))
-		y = dbr.name_from_id(cursor, 'Candidate', int(candidates[1])) 
-		jurisdiction = dbr.name_from_id(cursor, 'ReportingUnit', top_ru_id)
+		x = db.name_from_id(cursor, 'Candidate', int(candidates[0]))
+		y = db.name_from_id(cursor, 'Candidate', int(candidates[1])) 
+		jurisdiction = db.name_from_id(cursor, 'ReportingUnit', top_ru_id)
 
 		pivot_df = pd.pivot_table(temp_df, values='Count',
 			index=['Name'], columns='Selection').reset_index()
@@ -265,9 +265,9 @@ def create_bar(session, top_ru_id, subdivision_type_id, contest_type, contest, e
 			.sort_values('score', ascending=False).reset_index()
 
 		results = package_results(pivot_df, jurisdiction, x, y, restrict=8)
-		results["election"] = dbr.name_from_id(cursor, 'Election', election_id)
-		results["contest"] = dbr.name_from_id(cursor, 'Contest', int(temp_df.iloc[0]['Contest_Id']))
-		results["subdivision_type"] = dbr.name_from_id(cursor, 'ReportingUnitType', \
+		results["election"] = db.name_from_id(cursor, 'Election', election_id)
+		results["contest"] = db.name_from_id(cursor, 'Contest', int(temp_df.iloc[0]['Contest_Id']))
+		results["subdivision_type"] = db.name_from_id(cursor, 'ReportingUnitType', \
 				int(temp_df.iloc[0]['ReportingUnitType_Id']))
 		results["count_item_type"] = temp_df.iloc[0]['CountItemType']
 		results["votes_at_stake"] = temp_df.iloc[0]['votes_at_stake']
 
@@ -1,5 +1,5 @@
 #!/usr/bin/python3
-# db_routines/__init__.py
+# database/__init__.py
 
 import psycopg2
 import sqlalchemy
@@ -17,7 +17,7 @@
 import pandas as pd
 from election_anomaly import munge_routines as mr
 import re
-from election_anomaly.db_routines import create_cdf_db as db_cdf
+from election_anomaly.database import create_cdf_db as db_cdf
 import os
 import numpy as np
 from sqlalchemy import MetaData, Table, Column, Integer, Float
 
@@ -9,7 +9,7 @@
 from psycopg2 import sql
 import os
 import pandas as pd
-from election_anomaly import db_routines as dbr
+from election_anomaly import database as db
 import datetime
 
 
@@ -57,7 +57,7 @@ def create_common_data_format_tables(session,dirpath='CDF_schema_def_info/'):
         elif element == 'CandidateSelection':
             create_indices = [['Candidate_Id', 'Party_Id']]
         else:
-            # create_indices = [[dbr.get_name_field(element)]]
+            # create_indices = [[db.get_name_field(element)]]
             create_indices = None
             # TODO fix for efficiency -- note <contest_type>Contest, <contest_type>Selection may need special treatment
 
@@ -230,7 +230,7 @@ def load_bms(engine, bms_list: list):
     bms_df = pd.DataFrame([[s] for s in bms_list], columns=['Name'])
 
     # Create 3 entries in Selection table
-    id_list = dbr.add_records_to_selection_table(engine, len(bms_list))
+    id_list = db.add_records_to_selection_table(engine, len(bms_list))
 
     # Create entries in BallotMeasureSelection table
     bms_df['Id'] = pd.Series(id_list,index=bms_df.index)