Skip to content

Commit ab42b28

Browse files
Merge pull request #720 from ElectionDataAnalysis/issue709-release-notes
Issue709 release notes
2 parents 99ed28b + 0e053d8 commit ab42b28

21 files changed

Lines changed: 1357 additions & 1461 deletions

File tree

docs/User_Guide.md

Lines changed: 70 additions & 94 deletions
Large diffs are not rendered by default.

src/electiondata/__init__.py

Lines changed: 938 additions & 436 deletions
Large diffs are not rendered by default.

src/electiondata/analyze/__init__.py

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def create_scatter(
145145
v_count,
146146
v_type,
147147
v_runoff,
148-
):
148+
) -> Optional[dict]:
149149
connection = session.bind.raw_connection()
150150
cursor = connection.cursor()
151151

@@ -259,7 +259,7 @@ def create_scatter(
259259
return results
260260

261261

262-
def package_results(data, jurisdiction, x, y, restrict=None):
262+
def package_results(data, jurisdiction, x, y, restrict=None) -> dict:
263263
results = {"jurisdiction": jurisdiction, "x": x, "y": y, "counts": []}
264264
if restrict and len(data.index) > restrict:
265265
data = get_remaining_averages(data, restrict)
@@ -446,30 +446,50 @@ def get_votecount_data(
446446

447447
def create_bar(
448448
session: Session,
449-
top_ru_id: int,
450-
subdivision_type: str,
451-
contest_type: Optional[str],
452-
contest: Optional[str],
453449
election_id: int,
454-
for_export: bool,
455-
):
456-
450+
jurisdiction_id: int,
451+
subdivision_type: str,
452+
contest_district_type: Optional[str] = None,
453+
contest_or_contest_group: Optional[str] = None,
454+
for_export: bool = True,
455+
) -> Optional[List[dict]]:
456+
"""
457+
Required inputs:
458+
session: Session, sqlalchemy session
459+
election_id: int,
460+
jurisdiction_id: int,
461+
subdivision_type: str,
462+
Optional inputs:
463+
contest_district_type: Optional[str] = None,
464+
contest_or_contest_group: Optional[str] = None, from user-facing menu, either the name of a contest or of a
465+
group of contests, e.g., "All congressional"
466+
for_export: bool = True,
467+
468+
Returns:
469+
List[dict], list of dictionaries, where each dictionary contains information to create a bar
470+
chart. The bar charts in the list are chosen via an algorithm favoring charts with a single outlier
471+
county whose impact on the margin is large.
472+
# TODO document algorithm details in assign_anomaly_score(unsummed)
473+
Bar charts are restricted to results for the <contest_or_contest_group> , if given,and also
474+
from the contests with districts of type <contest_district_type>, if given
475+
"""
476+
# connect to db via psycopg2
457477
connection = session.bind.raw_connection()
458478
cursor = connection.cursor()
459479

460480
unsummed = db.unsummed_vote_counts_with_rollup_subdivision_id(
461-
session, election_id, top_ru_id, subdivision_type
481+
session, election_id, jurisdiction_id, subdivision_type
462482
)
463483

464-
if contest_type:
465-
contest_type = ui.get_contest_type_mapping(contest_type)
466-
unsummed = unsummed[unsummed["contest_district_type"] == contest_type]
484+
if contest_district_type:
485+
contest_district_type = ui.get_contest_type_mapping(contest_district_type)
486+
unsummed = unsummed[unsummed["contest_district_type"] == contest_district_type]
467487

468-
# through front end, contest_type must be truthy if contest is truthy
488+
# through VoteVisualizer front end, contest_type must be truthy if contest is truthy
469489
# Only filter when there is an actual contest passed through, as opposed to
470490
# "All congressional" as an example
471-
if contest and not contest.startswith("All "):
472-
unsummed = unsummed[unsummed["Contest"] == contest]
491+
if contest_or_contest_group and not contest_or_contest_group.startswith("All "):
492+
unsummed = unsummed[unsummed["Contest"] == contest_or_contest_group]
473493

474494
multiple_ballot_types = len(unsummed["CountItemType"].unique()) > 1
475495
groupby_cols = [
@@ -541,7 +561,7 @@ def create_bar(
541561
0
542562
]
543563
y_party_abbr = create_party_abbreviation(y_party)
544-
jurisdiction = db.name_from_id_cursor(cursor, "ReportingUnit", top_ru_id)
564+
jurisdiction = db.name_from_id_cursor(cursor, "ReportingUnit", jurisdiction_id)
545565

546566
pivot_df = pd.pivot_table(
547567
temp_df, values="Count", index=["Name"], columns="Selection", fill_value=0
@@ -577,7 +597,7 @@ def create_bar(
577597
acted = "widened"
578598
results["votes_at_stake"] = f"Outlier {acted} margin by ~ {votes_at_stake}"
579599
results["margin"] = human_readable_numbers(results["margin_raw"])
580-
results["preliminary"] = db.is_preliminary(cursor, election_id, top_ru_id)
600+
results["preliminary"] = db.is_preliminary(cursor, election_id, jurisdiction_id)
581601

582602
# display ballot info
583603
if multiple_ballot_types:
@@ -595,8 +615,8 @@ def create_bar(
595615
results[
596616
"title"
597617
] = f"""{results["count_item_type"].replace("-", " ").title()} Ballots Reported"""
598-
download_date = db.data_file_download(cursor, election_id, top_ru_id)
599-
if db.is_preliminary(cursor, election_id, top_ru_id) and download_date:
618+
download_date = db.data_file_download(cursor, election_id, jurisdiction_id)
619+
if db.is_preliminary(cursor, election_id, jurisdiction_id) and download_date:
600620
results[
601621
"title"
602622
] = f"""{results["title"]} as of {download_date} (preliminary)"""
@@ -606,7 +626,7 @@ def create_bar(
606626
return result_list
607627

608628

609-
def assign_anomaly_score(data):
629+
def assign_anomaly_score(data: pd.DataFrame) -> pd.DataFrame:
610630
"""adds a new column called score between 0 and 1; 1 is more anomalous.
611631
Also adds a `unit_id` column which assigns a score to each unit of analysis
612632
that is considered. For example, we may decide to look at anomalies across each
@@ -742,7 +762,7 @@ def assign_anomaly_score(data):
742762
return df
743763

744764

745-
def get_most_anomalous(data, n):
765+
def get_most_anomalous(data: pd.DataFrame, n: int) -> pd.DataFrame:
746766
"""Gets n contest, with 2 from largest votes at stake ratio
747767
and 1 with largest score. If 2 from votes at stake cannot be found
748768
(bc of threshold for score) then we fill in the top n from scores"""
@@ -801,7 +821,7 @@ def get_most_anomalous(data, n):
801821
return df
802822

803823

804-
def euclidean_zscore(li):
824+
def euclidean_zscore(li: List[List[float]]) -> List[float]:
805825
"""Take a list of vectors -- all in the same R^k,
806826
returns a list of the z-scores of the vectors -- each relative to the ensemble"""
807827
distance_list = [sum([dist.euclidean(item, y) for y in li]) for item in li]
@@ -812,7 +832,7 @@ def euclidean_zscore(li):
812832
return list(stats.zscore(distance_list))
813833

814834

815-
def calculate_votes_at_stake(data) -> pd.DataFrame:
835+
def calculate_votes_at_stake(data: pd.DataFrame) -> pd.DataFrame:
816836
"""Move the most anomalous pairing to the equivalent of the second-most anomalous
817837
and calculate the differences in votes that would be returned"""
818838
df = pd.DataFrame()
@@ -900,7 +920,7 @@ def calculate_votes_at_stake(data) -> pd.DataFrame:
900920
return df
901921

902922

903-
def create_candidate_contests(df, columns):
923+
def create_candidate_contests(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
904924
contest_df = (
905925
df["VoteCount"]
906926
.merge(df["Contest"], how="left", left_on="Contest_Id", right_index=True)
@@ -926,7 +946,9 @@ def create_candidate_contests(df, columns):
926946
return contest_df
927947

928948

929-
def create_ballot_measure_contests(df, columns):
949+
def create_ballot_measure_contests(
950+
df: pd.DataFrame, columns: List[str]
951+
) -> pd.DataFrame:
930952
ballotmeasure_df = (
931953
df["ContestSelectionJoin"]
932954
.merge(
@@ -953,7 +975,7 @@ def create_ballot_measure_contests(df, columns):
953975
return ballotmeasure_df
954976

955977

956-
def get_unit_by_column(data, column):
978+
def get_unit_by_column(data: pd.DataFrame, column: str) -> List[int]:
957979
"""Given a dataframe of results, return a list of unique unit_ids
958980
that are sorted in desc order by the column's value"""
959981
data = data[["unit_id", column]]
@@ -962,7 +984,7 @@ def get_unit_by_column(data, column):
962984
return list(data["unit_id"].unique())
963985

964986

965-
def human_readable_numbers(value):
987+
def human_readable_numbers(value: float) -> str:
966988
abs_value = abs(value)
967989
if abs_value < 10:
968990
return str(value)
@@ -974,7 +996,7 @@ def human_readable_numbers(value):
974996
return "{:,}".format(round(value, -3))
975997

976998

977-
def sort_pivot_by_margins(df):
999+
def sort_pivot_by_margins(df: pd.DataFrame) -> pd.DataFrame:
9781000
"""grab the row with the highest anomaly score, then sort the remainder by
9791001
margin. The sorting order depends on whether the anomalous row is >50% or <50%"""
9801002

src/electiondata/constants/__init__.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,34 @@
127127
default_subdivision_type = "county"
128128
subdivision_reference_file_path = os.path.join(
129129
"jurisdictions",
130-
"000_major_subjurisdiction_types.txt",
130+
"000_for_all_jurisdictions",
131+
"major_subjurisdiction_types.txt",
131132
)
132133

134+
135+
def jurisdiction_wide_contests(abbr: str) -> List[str]:
136+
"""
137+
Inputs:
138+
abbr: str, abbreviation for jurisdiction (e.g., TX)
139+
140+
Returns:
141+
List[str], standard list of jurisdiction-wide contets
142+
"""
143+
return [
144+
f"US President ({abbr})",
145+
f"{abbr} Governor",
146+
f"US Senate {abbr}",
147+
f"{abbr} Attorney General",
148+
f"{abbr} Lieutenant Governor",
149+
f"{abbr} Treasurer",
150+
f"{abbr} Secretary of State",
151+
]
152+
153+
133154
# display information
134155
if 1:
156+
"""maps ReportingUnitType of election district of contest to the user-facing label for that type of contest
157+
for use in Analyzer.display_options()"""
135158
contest_type_mappings = {
136159
"congressional": "Congressional",
137160
"state": "Statewide",
@@ -433,9 +456,9 @@
433456
"results_note",
434457
"jurisdiction",
435458
"election",
459+
"is_preliminary",
436460
]
437461
sdl_pars_opt = [
438-
"jurisdiction_path",
439462
"CandidateContest",
440463
"BallotMeasureContest",
441464
"BallotMeasureSelection",
@@ -444,7 +467,6 @@
444467
"CountItemType",
445468
"ReportingUnit",
446469
"Contest",
447-
"is_preliminary",
448470
]
449471
multi_data_loader_pars = [
450472
"results_dir",
@@ -572,6 +594,7 @@
572594
"warn-munger",
573595
"warn-test",
574596
]
597+
regex_failure_string = " <- Does not match regular expression"
575598
# regex patterns
576599
if 1:
577600
brace_pattern = re.compile(r"{<([^,]*)>,([^{}]*|[^{}]*{[^{}]*}[^{}]*)}")

0 commit comments

Comments
 (0)