Skip to content

Commit cf655c2

Browse files
committed
document code (WIP)
1 parent e59057d commit cf655c2

2 files changed

Lines changed: 96 additions & 49 deletions

File tree

src/electiondata/__init__.py

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2891,24 +2891,39 @@ def bar(
28912891
contest_type: str = None,
28922892
contest: str = None,
28932893
fig_type: str = None,
2894-
) -> List[dict]:
2895-
"""contest_type is an election district type, e.g.,
2896-
state, congressional, state-senate, state-house, territory, etc.
2897-
Complete list is given by the keys of <db.contest_type_mapping>"""
2894+
) -> Optional[List[dict]]:
2895+
"""
2896+
Required inputs:
2897+
election: str,
2898+
jurisdiction: str,
2899+
Optional input:
2900+
contest_type: str = None, an election district type, e.g.,
2901+
state, congressional, state-senate, state-house, territory, etc.
2902+
Complete list is given by the keys of <constants.contest_type_mapping>
2903+
contest: str = None,
2904+
fig_type: str = None, an image format string from plotly - as of 8/2021, includes
2905+
html, png, jpeg, webp, svg, pdf, and eps. Note that some filetypes may need
2906+
plotly-orca installed as well.
2907+
2908+
If <fig_type> is given and points for scatter are found, creates a scatter plot
2909+
in the self.reports_and_plots_dir directory with file extension and format determined by fig_type.
2910+
2911+
Returns:
2912+
List[dict],
2913+
"""
28982914
election_id = db.name_to_id(self.session, "Election", election)
28992915
jurisdiction_id = db.name_to_id(self.session, "ReportingUnit", jurisdiction)
2900-
# for now, bar charts can only handle jurisdictions where county is one level
2916+
# for now, bar charts can only handle jurisdictions where major subdivision type is one level
29012917
# down from the jurisdiction
2902-
subdivision_type = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id)
2903-
# bar chart always at one level below top reporting unit
2918+
# TODO is this still true? ^^
29042919
agg_results = an.create_bar(
29052920
self.session,
2906-
jurisdiction_id,
2907-
subdivision_type,
2908-
contest_type,
2909-
contest,
29102921
election_id,
2911-
False,
2922+
jurisdiction_id,
2923+
self.major_subdivision_type[jurisdiction],
2924+
contest_district_type=contest_type,
2925+
contest_or_contest_group=contest,
2926+
for_export = False,
29122927
)
29132928
if fig_type and agg_results:
29142929
for agg_result in agg_results:
@@ -2934,20 +2949,18 @@ def export_outlier_data(
29342949
election: str,
29352950
jurisdiction: str,
29362951
contest: str = None,
2937-
) -> List[dict]:
2952+
) -> Optional[List[dict]]:
29382953
"""contest_type is one of state, congressional, state-senate, state-house"""
29392954
election_id = db.name_to_id(self.session, "Election", election)
29402955
jurisdiction_id = db.name_to_id(self.session, "ReportingUnit", jurisdiction)
2941-
subdivision_type = db.get_jurisdiction_hierarchy(self.session, jurisdiction_id)
29422956
# bar chart always at one level below top reporting unit
29432957
agg_results = an.create_bar(
29442958
self.session,
2945-
jurisdiction_id,
2946-
subdivision_type,
2947-
None,
2948-
contest,
29492959
election_id,
2950-
True,
2960+
jurisdiction_id,
2961+
self.major_subdivision_type[jurisdiction],
2962+
contest_or_contest_group=contest,
2963+
for_export=True,
29512964
)
29522965
return agg_results
29532966

@@ -4223,9 +4236,9 @@ def reload_juris_election(
42234236

42244237
def datafile_info(
42254238
connection,
4226-
ini_filename,
4227-
results_short_name,
4228-
file_name,
4239+
ini_filename: str,
4240+
results_short_name: str,
4241+
file_name: str,
42294242
download_date: str,
42304243
source: str,
42314244
note: str,
@@ -4332,6 +4345,20 @@ def export_notes_from_ini_files(
43324345
election: Optional[str] = None,
43334346
jurisdiction: Optional[str] = None,
43344347
):
4348+
"""
4349+
Required inputs:
4350+
directory: str, path to directory
4351+
target_file: str, path to file
4352+
4353+
Optional inputs:
4354+
election: Optional[str] = None,
4355+
jurisdiction: Optional[str] = None,
4356+
4357+
Creates <target_file> containing a summary of all results_notes parameter values from the .ini
4358+
files in <directory> or its subdirectories. If <election> (resp. <jurisdiction>) is given,
4359+
ignores all .ini files whose election (resp. jurisdiction) parameter value matches
4360+
<election> (resp. <jurisdiction>).
4361+
"""
43354362
df = pd.DataFrame(columns=["election", "jurisdiction", "results_note"])
43364363
# collect notes
43374364
try:

src/electiondata/analyze/__init__.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -446,30 +446,50 @@ def get_votecount_data(
446446

447447
def create_bar(
448448
session: Session,
449-
top_ru_id: int,
450-
subdivision_type: str,
451-
contest_type: Optional[str],
452-
contest: Optional[str],
453449
election_id: int,
454-
for_export: bool,
455-
) -> List[dict]:
456-
450+
jurisdiction_id: int,
451+
subdivision_type: str,
452+
contest_district_type: Optional[str] = None,
453+
contest_or_contest_group: Optional[str] = None,
454+
for_export: bool = True,
455+
) -> Optional[List[dict]]:
456+
"""
457+
Required inputs:
458+
session: Session, sqlalchemy session
459+
election_id: int,
460+
jurisdiction_id: int,
461+
subdivision_type: str,
462+
Optional inputs:
463+
contest_district_type: Optional[str] = None,
464+
contest_or_contest_group: Optional[str] = None, from user-facing menu, either the name of a contest or of a
465+
group of contests, e.g., "All congressional"
466+
for_export: bool = True,
467+
468+
Returns:
469+
List[dict], list of dictionaries, where each dictionary contains information to create a bar
470+
chart. The bar charts in the list are chosen via an algorithm favoring charts with a single outlier
471+
county whose impact on the margin is large.
472+
# TODO document algorithm details in assign_anomaly_score(unsummed)
473+
Bar charts are restricted to results for the <contest_or_contest_group> , if given,and also
474+
from the contests with districts of type <contest_district_type>, if given
475+
"""
476+
# connect to db via psycopg2
457477
connection = session.bind.raw_connection()
458478
cursor = connection.cursor()
459479

460480
unsummed = db.unsummed_vote_counts_with_rollup_subdivision_id(
461-
session, election_id, top_ru_id, subdivision_type
481+
session, election_id, jurisdiction_id, subdivision_type
462482
)
463483

464-
if contest_type:
465-
contest_type = ui.get_contest_type_mapping(contest_type)
466-
unsummed = unsummed[unsummed["contest_district_type"] == contest_type]
484+
if contest_district_type:
485+
contest_district_type = ui.get_contest_type_mapping(contest_district_type)
486+
unsummed = unsummed[unsummed["contest_district_type"] == contest_district_type]
467487

468-
# through front end, contest_type must be truthy if contest is truthy
488+
# through VoteVisualizer front end, contest_type must be truthy if contest is truthy
469489
# Only filter when there is an actual contest passed through, as opposed to
470490
# "All congressional" as an example
471-
if contest and not contest.startswith("All "):
472-
unsummed = unsummed[unsummed["Contest"] == contest]
491+
if contest_or_contest_group and not contest_or_contest_group.startswith("All "):
492+
unsummed = unsummed[unsummed["Contest"] == contest_or_contest_group]
473493

474494
multiple_ballot_types = len(unsummed["CountItemType"].unique()) > 1
475495
groupby_cols = [
@@ -541,7 +561,7 @@ def create_bar(
541561
0
542562
]
543563
y_party_abbr = create_party_abbreviation(y_party)
544-
jurisdiction = db.name_from_id_cursor(cursor, "ReportingUnit", top_ru_id)
564+
jurisdiction = db.name_from_id_cursor(cursor, "ReportingUnit",jurisdiction_id)
545565

546566
pivot_df = pd.pivot_table(
547567
temp_df, values="Count", index=["Name"], columns="Selection", fill_value=0
@@ -577,7 +597,7 @@ def create_bar(
577597
acted = "widened"
578598
results["votes_at_stake"] = f"Outlier {acted} margin by ~ {votes_at_stake}"
579599
results["margin"] = human_readable_numbers(results["margin_raw"])
580-
results["preliminary"] = db.is_preliminary(cursor, election_id, top_ru_id)
600+
results["preliminary"] = db.is_preliminary(cursor,election_id,jurisdiction_id)
581601

582602
# display ballot info
583603
if multiple_ballot_types:
@@ -595,8 +615,8 @@ def create_bar(
595615
results[
596616
"title"
597617
] = f"""{results["count_item_type"].replace("-", " ").title()} Ballots Reported"""
598-
download_date = db.data_file_download(cursor, election_id, top_ru_id)
599-
if db.is_preliminary(cursor, election_id, top_ru_id) and download_date:
618+
download_date = db.data_file_download(cursor,election_id,jurisdiction_id)
619+
if db.is_preliminary(cursor,election_id,jurisdiction_id) and download_date:
600620
results[
601621
"title"
602622
] = f"""{results["title"]} as of {download_date} (preliminary)"""
@@ -606,7 +626,7 @@ def create_bar(
606626
return result_list
607627

608628

609-
def assign_anomaly_score(data):
629+
def assign_anomaly_score(data: pd.DataFrame) -> pd.DataFrame:
610630
"""adds a new column called score between 0 and 1; 1 is more anomalous.
611631
Also adds a `unit_id` column which assigns a score to each unit of analysis
612632
that is considered. For example, we may decide to look at anomalies across each
@@ -742,7 +762,7 @@ def assign_anomaly_score(data):
742762
return df
743763

744764

745-
def get_most_anomalous(data, n):
765+
def get_most_anomalous(data: pd.DataFrame, n: int) -> pd.DataFrame:
746766
"""Gets n contest, with 2 from largest votes at stake ratio
747767
and 1 with largest score. If 2 from votes at stake cannot be found
748768
(bc of threshold for score) then we fill in the top n from scores"""
@@ -801,7 +821,7 @@ def get_most_anomalous(data, n):
801821
return df
802822

803823

804-
def euclidean_zscore(li):
824+
def euclidean_zscore(li: List[List[float]]) -> List[float]:
805825
"""Take a list of vectors -- all in the same R^k,
806826
returns a list of the z-scores of the vectors -- each relative to the ensemble"""
807827
distance_list = [sum([dist.euclidean(item, y) for y in li]) for item in li]
@@ -812,7 +832,7 @@ def euclidean_zscore(li):
812832
return list(stats.zscore(distance_list))
813833

814834

815-
def calculate_votes_at_stake(data) -> pd.DataFrame:
835+
def calculate_votes_at_stake(data: pd.DataFrame) -> pd.DataFrame:
816836
"""Move the most anomalous pairing to the equivalent of the second-most anomalous
817837
and calculate the differences in votes that would be returned"""
818838
df = pd.DataFrame()
@@ -900,7 +920,7 @@ def calculate_votes_at_stake(data) -> pd.DataFrame:
900920
return df
901921

902922

903-
def create_candidate_contests(df, columns):
923+
def create_candidate_contests(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
904924
contest_df = (
905925
df["VoteCount"]
906926
.merge(df["Contest"], how="left", left_on="Contest_Id", right_index=True)
@@ -926,7 +946,7 @@ def create_candidate_contests(df, columns):
926946
return contest_df
927947

928948

929-
def create_ballot_measure_contests(df, columns):
949+
def create_ballot_measure_contests(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
930950
ballotmeasure_df = (
931951
df["ContestSelectionJoin"]
932952
.merge(
@@ -953,7 +973,7 @@ def create_ballot_measure_contests(df, columns):
953973
return ballotmeasure_df
954974

955975

956-
def get_unit_by_column(data, column):
976+
def get_unit_by_column(data: pd.DataFrame, column: str) -> List[int]:
957977
"""Given a dataframe of results, return a list of unique unit_ids
958978
that are sorted in desc order by the column's value"""
959979
data = data[["unit_id", column]]
@@ -962,7 +982,7 @@ def get_unit_by_column(data, column):
962982
return list(data["unit_id"].unique())
963983

964984

965-
def human_readable_numbers(value):
985+
def human_readable_numbers(value: float) -> str:
966986
abs_value = abs(value)
967987
if abs_value < 10:
968988
return str(value)
@@ -974,7 +994,7 @@ def human_readable_numbers(value):
974994
return "{:,}".format(round(value, -3))
975995

976996

977-
def sort_pivot_by_margins(df):
997+
def sort_pivot_by_margins(df:pd.DataFrame) -> pd.DataFrame:
978998
"""grab the row with the highest anomaly score, then sort the remainder by
979999
margin. The sorting order depends on whether the anomalous row is >50% or <50%"""
9801000

0 commit comments

Comments
 (0)