@@ -145,7 +145,7 @@ def create_scatter(
145145 v_count ,
146146 v_type ,
147147 v_runoff ,
148- ):
148+ ) -> Optional [ dict ] :
149149 connection = session .bind .raw_connection ()
150150 cursor = connection .cursor ()
151151
@@ -259,7 +259,7 @@ def create_scatter(
259259 return results
260260
261261
262- def package_results (data , jurisdiction , x , y , restrict = None ):
262+ def package_results (data , jurisdiction , x , y , restrict = None ) -> dict :
263263 results = {"jurisdiction" : jurisdiction , "x" : x , "y" : y , "counts" : []}
264264 if restrict and len (data .index ) > restrict :
265265 data = get_remaining_averages (data , restrict )
@@ -446,30 +446,50 @@ def get_votecount_data(
446446
447447def create_bar (
448448 session : Session ,
449- top_ru_id : int ,
450- subdivision_type : str ,
451- contest_type : Optional [str ],
452- contest : Optional [str ],
453449 election_id : int ,
454- for_export : bool ,
455- ):
456-
450+ jurisdiction_id : int ,
451+ subdivision_type : str ,
452+ contest_district_type : Optional [str ] = None ,
453+ contest_or_contest_group : Optional [str ] = None ,
454+ for_export : bool = True ,
455+ ) -> Optional [List [dict ]]:
456+ """
457+ Required inputs:
458+ session: Session, sqlalchemy session
459+ election_id: int,
460+ jurisdiction_id: int,
461+ subdivision_type: str,
462+ Optional inputs:
463+ contest_district_type: Optional[str] = None,
464+ contest_or_contest_group: Optional[str] = None, from user-facing menu, either the name of a contest or of a
465+ group of contests, e.g., "All congressional"
466+ for_export: bool = True,
467+
468+ Returns:
469+ List[dict], list of dictionaries, where each dictionary contains information to create a bar
470+ chart. The bar charts in the list are chosen via an algorithm favoring charts with a single outlier
471+ county whose impact on the margin is large.
472+ # TODO document algorithm details in assign_anomaly_score(unsummed)
473+ Bar charts are restricted to results for the <contest_or_contest_group> , if given,and also
474+ from the contests with districts of type <contest_district_type>, if given
475+ """
476+ # connect to db via psycopg2
457477 connection = session .bind .raw_connection ()
458478 cursor = connection .cursor ()
459479
460480 unsummed = db .unsummed_vote_counts_with_rollup_subdivision_id (
461- session , election_id , top_ru_id , subdivision_type
481+ session , election_id , jurisdiction_id , subdivision_type
462482 )
463483
464- if contest_type :
465- contest_type = ui .get_contest_type_mapping (contest_type )
466- unsummed = unsummed [unsummed ["contest_district_type" ] == contest_type ]
484+ if contest_district_type :
485+ contest_district_type = ui .get_contest_type_mapping (contest_district_type )
486+ unsummed = unsummed [unsummed ["contest_district_type" ] == contest_district_type ]
467487
468- # through front end, contest_type must be truthy if contest is truthy
488+ # through VoteVisualizer front end, contest_type must be truthy if contest is truthy
469489 # Only filter when there is an actual contest passed through, as opposed to
470490 # "All congressional" as an example
471- if contest and not contest .startswith ("All " ):
472- unsummed = unsummed [unsummed ["Contest" ] == contest ]
491+ if contest_or_contest_group and not contest_or_contest_group .startswith ("All " ):
492+ unsummed = unsummed [unsummed ["Contest" ] == contest_or_contest_group ]
473493
474494 multiple_ballot_types = len (unsummed ["CountItemType" ].unique ()) > 1
475495 groupby_cols = [
@@ -541,7 +561,7 @@ def create_bar(
541561 0
542562 ]
543563 y_party_abbr = create_party_abbreviation (y_party )
544- jurisdiction = db .name_from_id_cursor (cursor , "ReportingUnit" , top_ru_id )
564+ jurisdiction = db .name_from_id_cursor (cursor , "ReportingUnit" , jurisdiction_id )
545565
546566 pivot_df = pd .pivot_table (
547567 temp_df , values = "Count" , index = ["Name" ], columns = "Selection" , fill_value = 0
@@ -577,7 +597,7 @@ def create_bar(
577597 acted = "widened"
578598 results ["votes_at_stake" ] = f"Outlier { acted } margin by ~ { votes_at_stake } "
579599 results ["margin" ] = human_readable_numbers (results ["margin_raw" ])
580- results ["preliminary" ] = db .is_preliminary (cursor , election_id , top_ru_id )
600+ results ["preliminary" ] = db .is_preliminary (cursor , election_id , jurisdiction_id )
581601
582602 # display ballot info
583603 if multiple_ballot_types :
@@ -595,8 +615,8 @@ def create_bar(
595615 results [
596616 "title"
597617 ] = f"""{ results ["count_item_type" ].replace ("-" , " " ).title ()} Ballots Reported"""
598- download_date = db .data_file_download (cursor , election_id , top_ru_id )
599- if db .is_preliminary (cursor , election_id , top_ru_id ) and download_date :
618+ download_date = db .data_file_download (cursor , election_id , jurisdiction_id )
619+ if db .is_preliminary (cursor , election_id , jurisdiction_id ) and download_date :
600620 results [
601621 "title"
602622 ] = f"""{ results ["title" ]} as of { download_date } (preliminary)"""
@@ -606,7 +626,7 @@ def create_bar(
606626 return result_list
607627
608628
609- def assign_anomaly_score (data ) :
629+ def assign_anomaly_score (data : pd . DataFrame ) -> pd . DataFrame :
610630 """adds a new column called score between 0 and 1; 1 is more anomalous.
611631 Also adds a `unit_id` column which assigns a score to each unit of analysis
612632 that is considered. For example, we may decide to look at anomalies across each
@@ -742,7 +762,7 @@ def assign_anomaly_score(data):
742762 return df
743763
744764
745- def get_most_anomalous (data , n ) :
765+ def get_most_anomalous (data : pd . DataFrame , n : int ) -> pd . DataFrame :
746766 """Gets n contest, with 2 from largest votes at stake ratio
747767 and 1 with largest score. If 2 from votes at stake cannot be found
748768 (bc of threshold for score) then we fill in the top n from scores"""
@@ -801,7 +821,7 @@ def get_most_anomalous(data, n):
801821 return df
802822
803823
804- def euclidean_zscore (li ) :
824+ def euclidean_zscore (li : List [ List [ float ]]) -> List [ float ] :
805825 """Take a list of vectors -- all in the same R^k,
806826 returns a list of the z-scores of the vectors -- each relative to the ensemble"""
807827 distance_list = [sum ([dist .euclidean (item , y ) for y in li ]) for item in li ]
@@ -812,7 +832,7 @@ def euclidean_zscore(li):
812832 return list (stats .zscore (distance_list ))
813833
814834
815- def calculate_votes_at_stake (data ) -> pd .DataFrame :
835+ def calculate_votes_at_stake (data : pd . DataFrame ) -> pd .DataFrame :
816836 """Move the most anomalous pairing to the equivalent of the second-most anomalous
817837 and calculate the differences in votes that would be returned"""
818838 df = pd .DataFrame ()
@@ -900,7 +920,7 @@ def calculate_votes_at_stake(data) -> pd.DataFrame:
900920 return df
901921
902922
903- def create_candidate_contests (df , columns ) :
923+ def create_candidate_contests (df : pd . DataFrame , columns : List [ str ]) -> pd . DataFrame :
904924 contest_df = (
905925 df ["VoteCount" ]
906926 .merge (df ["Contest" ], how = "left" , left_on = "Contest_Id" , right_index = True )
@@ -926,7 +946,9 @@ def create_candidate_contests(df, columns):
926946 return contest_df
927947
928948
929- def create_ballot_measure_contests (df , columns ):
949+ def create_ballot_measure_contests (
950+ df : pd .DataFrame , columns : List [str ]
951+ ) -> pd .DataFrame :
930952 ballotmeasure_df = (
931953 df ["ContestSelectionJoin" ]
932954 .merge (
@@ -953,7 +975,7 @@ def create_ballot_measure_contests(df, columns):
953975 return ballotmeasure_df
954976
955977
956- def get_unit_by_column (data , column ) :
978+ def get_unit_by_column (data : pd . DataFrame , column : str ) -> List [ int ] :
957979 """Given a dataframe of results, return a list of unique unit_ids
958980 that are sorted in desc order by the column's value"""
959981 data = data [["unit_id" , column ]]
@@ -962,7 +984,7 @@ def get_unit_by_column(data, column):
962984 return list (data ["unit_id" ].unique ())
963985
964986
965- def human_readable_numbers (value ) :
987+ def human_readable_numbers (value : float ) -> str :
966988 abs_value = abs (value )
967989 if abs_value < 10 :
968990 return str (value )
@@ -974,7 +996,7 @@ def human_readable_numbers(value):
974996 return "{:,}" .format (round (value , - 3 ))
975997
976998
977- def sort_pivot_by_margins (df ) :
999+ def sort_pivot_by_margins (df : pd . DataFrame ) -> pd . DataFrame :
9781000 """grab the row with the highest anomaly score, then sort the remainder by
9791001 margin. The sorting order depends on whether the anomalous row is >50% or <50%"""
9801002
0 commit comments