Skip to content

Commit b01aa03

Browse files
committed
document code
1 parent cf655c2 commit b01aa03

4 files changed

Lines changed: 88 additions & 475 deletions

File tree

src/electiondata/__init__.py

Lines changed: 81 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2541,7 +2541,7 @@ def test_loaded_results(
25412541
f"for every {self.major_subdivision_type[juris_true_name]}",
25422542
)
25432543
# report contests with unknown candidates
2544-
bad_contests = self.get_contest_with_unknown_candidates(
2544+
bad_contests = self.get_contests_with_unknown_candidates(
25452545
election, juris_true_name, report_dir=self.reports_and_plots_dir
25462546
)
25472547
if bad_contests:
@@ -2754,7 +2754,7 @@ def check_count_types_standard(
27542754
# if nothing failed, count types are standard
27552755
return True
27562756

2757-
def get_contest_with_unknown_candidates(
2757+
def get_contests_with_unknown_candidates(
27582758
self,
27592759
election: str,
27602760
jurisdiction: str,
@@ -2950,7 +2950,8 @@ def export_outlier_data(
29502950
jurisdiction: str,
29512951
contest: str = None,
29522952
) -> Optional[List[dict]]:
2953-
"""contest_type is one of state, congressional, state-senate, state-house"""
2953+
"""Not ready for prime time
2954+
contest_type is one of state, congressional, state-senate, state-house"""
29542955
election_id = db.name_to_id(self.session, "Election", election)
29552956
jurisdiction_id = db.name_to_id(self.session, "ReportingUnit", jurisdiction)
29562957
# bar chart always at one level below top reporting unit
@@ -3065,6 +3066,17 @@ def export_nist_v2(
30653066
def export_election_to_tsv(
30663067
self, target_file: str, election: str, jurisdiction: Optional[str] = None
30673068
):
3069+
"""
3070+
Required inputs:
3071+
target_file: str, path to file
3072+
election: str,
3073+
Optional inputs:
3074+
jurisdiction: Optional[str] = None,
3075+
3076+
Exports all election results from <self.session>'s database for the election <election> (and the jurisdiction
3077+
<jurisdiction>, if given) to the <target_file>. Columns exported are: "Election",
3078+
"Contest", "Selection", "Party", "ReportingUnit", "VoteType", "Count", "Preliminary"
3079+
"""
30683080
# get internal ids for election (and maybe jurisdiction too)
30693081
election_id = db.name_to_id(self.session, "Election", election)
30703082
if jurisdiction is not None:
@@ -3110,7 +3122,9 @@ def diff_in_diff_dem_vs_rep(
31103122
self,
31113123
election: str,
31123124
) -> (pd.DataFrame, list):
3113-
"""for each jurisdiction in the election that has more than just 'total',
3125+
"""
3126+
Not ready for prime time
3127+
for each jurisdiction in the election that has more than just 'total',
31143128
Calculate all possible diff-in-diff values per Herron
31153129
http://doi.org/10.1089/elj.2019.0544.
31163130
Return df with columns election, overall jurisdiction, county-type jurisdiction,
@@ -3320,7 +3334,9 @@ def vote_share_comparison(
33203334
election_id: int,
33213335
reportingunit_id: int,
33223336
) -> Dict[str, Dict[str, Any]]:
3323-
"""given an election, a reporting unit -- not necessarily a whole jurisdiction--
3337+
"""
3338+
Not ready for prime time
3339+
given an election, a reporting unit -- not necessarily a whole jurisdiction--
33243340
and an element for pairing (e.g., "Contest"), return a dictionary mapping pairs of elements
33253341
to pairs of vote shares (summing over everything else)"""
33263342
vote_share = dict()
@@ -3339,7 +3355,8 @@ def vote_count_by_element(
33393355
election_id: int,
33403356
jurisdiction_id: int,
33413357
) -> dict:
3342-
"""Returns dictionary of vote counts by element (summing over everything else
3358+
"""Not ready for prime time
3359+
Returns dictionary of vote counts by element (summing over everything else
33433360
within the given election and reporting unit)"""
33443361
if element == "CountItemType":
33453362
name_field = "CountItemType"
@@ -3426,6 +3443,7 @@ def aggregate(
34263443
def pres_counts_by_vote_type_and_major_subdiv(
34273444
self, jurisdiction: str
34283445
) -> pd.DataFrame:
3446+
"""Not ready for prime time """
34293447
# TODO return dataframe with columns jurisdiction, subdivision, year, CountItemType,
34303448
# total votes for pres in general election
34313449
group_cols = [
@@ -3461,6 +3479,7 @@ def pres_counts_by_vote_type_and_major_subdiv(
34613479
return df_pres
34623480

34633481
def pres_counts_by_vote_type_and_major_subdiv_all(self) -> pd.DataFrame:
3482+
"""Not ready for prime time"""
34643483
all_df = pd.DataFrame()
34653484
for jurisdiction in constants.abbr.keys():
34663485
df = self.pres_counts_by_vote_type_and_major_subdiv(jurisdiction)
@@ -3744,28 +3763,52 @@ def aggregate_results(
37443763
def data_exists(
37453764
election: str,
37463765
jurisdiction: str,
3747-
p_path: Optional[str] = None,
3766+
param_file: Optional[str] = None,
37483767
dbname: Optional[str] = None,
37493768
) -> bool:
3750-
an = Analyzer(param_file=p_path, dbname=dbname)
3751-
return an.data_exists(election, jurisdiction)
3769+
"""
3770+
Required inputs:
3771+
election: str,
3772+
jurisdiction: str,
3773+
Optional inputs:
3774+
param_file: Optional[str] = None,
3775+
dbname: Optional[str] = None,
3776+
3777+
Returns:
3778+
bool, True if database specified by parameters in <param_file> (or database named <dbname>, if given) has
3779+
any election results data for the given <election> and <jurisdiction>. Otherwise false.
3780+
"""
3781+
analyzer = Analyzer(param_file=param_file,dbname=dbname)
3782+
return analyzer.data_exists(election, jurisdiction)
37523783

37533784

37543785
def external_data_exists(
37553786
election: str,
37563787
jurisdiction: str,
3757-
p_path: Optional[str] = None,
3788+
param_file: Optional[str] = None,
37583789
dbname: Optional[str] = None,
37593790
) -> bool:
3760-
an = Analyzer(param_file=p_path, dbname=dbname)
3791+
"""
3792+
Required inputs:
3793+
election: str,
3794+
jurisdiction: str,
3795+
Optional inputs:
3796+
param_file: Optional[str] = None,
3797+
dbname: Optional[str] = None,
3798+
3799+
Returns:
3800+
bool, True if database specified by parameters in <param_file> (or database named <dbname>, if given) has
3801+
any external dataset content for the given election and jurisdiction. Otherwise false.
3802+
"""
3803+
an = Analyzer(param_file=param_file,dbname=dbname)
37613804
if not an:
37623805
return False
37633806

37643807
jurisdiction_id = db.name_to_id(an.session, "ReportingUnit", jurisdiction)
37653808
election_id = db.name_to_id(an.session, "Election", election)
37663809

3767-
# if the database doesn't have the reporting unit
3768-
if not jurisdiction_id:
3810+
# if the database doesn't have both the jurisdiction and the election
3811+
if (not jurisdiction_id) or (not election_id):
37693812
# data doesn't exist
37703813
return False
37713814

@@ -3784,68 +3827,6 @@ def external_data_exists(
37843827
return True
37853828

37863829

3787-
def check_totals_match_vote_types(
3788-
election: str,
3789-
jurisdiction: str,
3790-
sub_unit_type=constants.default_subdivision_type,
3791-
dbname: Optional[str] = None,
3792-
param_file: Optional[str] = None,
3793-
) -> bool:
3794-
"""Interesting if there are both total and other vote types;
3795-
otherwise trivially true"""
3796-
an = Analyzer(dbname=dbname, param_file=param_file)
3797-
return an.check_count_types_standard(election, jurisdiction)
3798-
3799-
3800-
def contest_total(
3801-
election: str,
3802-
jurisdiction: str,
3803-
contest: str,
3804-
dbname: Optional[str] = None,
3805-
param_file: Optional[str] = None,
3806-
vote_type: Optional[str] = None,
3807-
reporting_unit: Optional[str] = None,
3808-
sub_unit_type: str = constants.default_subdivision_type,
3809-
contest_type: Optional[str] = "Candidate",
3810-
) -> int:
3811-
df = aggregate_results(
3812-
election=election,
3813-
jurisdiction=jurisdiction,
3814-
dbname=dbname,
3815-
vote_type=vote_type,
3816-
sub_unit=reporting_unit,
3817-
sub_unit_type=sub_unit_type,
3818-
contest=contest,
3819-
contest_type=contest_type,
3820-
param_file=param_file,
3821-
)
3822-
return df["count"].sum()
3823-
3824-
3825-
def check_count_types_standard(
3826-
election: str,
3827-
jurisdiction: str,
3828-
dbname: Optional[str] = None,
3829-
param_file: Optional[str] = None,
3830-
) -> bool:
3831-
an = Analyzer(dbname=dbname, param_file=param_file)
3832-
return an.check_count_types_standard(election, jurisdiction)
3833-
3834-
3835-
def get_contest_with_unknown_candidates(
3836-
election: str,
3837-
jurisdiction: str,
3838-
dbname: Optional[str] = None,
3839-
param_file: Optional[str] = None,
3840-
) -> List[str]:
3841-
an = Analyzer(dbname=dbname, param_file=param_file)
3842-
if not an:
3843-
return [f"Failure to connect to database"]
3844-
return an.get_contest_with_unknown_candidates(
3845-
election, jurisdiction, report_dir=an.reports_and_plots_dir
3846-
)
3847-
3848-
38493830
def load_results_df(
38503831
session: Session,
38513832
df: pd.DataFrame,
@@ -3858,10 +3839,32 @@ def load_results_df(
38583839
election_id: int,
38593840
rollup: bool = False,
38603841
rollup_rut: str = constants.default_subdivision_type,
3861-
alt_dictionary: Optional[
3862-
str
3863-
] = None, # when given, use this dictionary, not the one in the jurisdiction directory
3842+
alt_dictionary: Optional[str] = None,
38643843
) -> Optional[dict]:
3844+
"""
3845+
Required inputs:
3846+
session: Session,
3847+
df: pd.DataFrame, dataframe with columns 'Count', 'Candidate_raw', 'Party_raw', etc.
3848+
necessary_constants: dict, dictionary of constant values (e.g., if all rows of <df> are for a single Contest)
3849+
juris_true_name: str, for error reporting
3850+
file_name: str, for error reporting
3851+
munger_name: str, for error reporting
3852+
path_to_jurisdiction_dir: str, path to directory
3853+
datafile_id: int,
3854+
election_id: int,
3855+
Optional inputs:
3856+
rollup: bool = False,
3857+
rollup_rut: str = constants.default_subdivision_type,
3858+
alt_dictionary: Optional[str] = None, path to file
3859+
3860+
Munges vote counts in dataframe into the <session>'s database, using the dictionary.txt file in the
3861+
<path_to_jurisdiction_dir> directory or, if given, the file specified by <alt_dictionary>. If
3862+
<rollup> then results are rolled up to the ReportingUnitType <rollup_rut> if given; the default is
3863+
<constants.default_subdivision_type>.
3864+
3865+
Returns:
3866+
Optional[dict], error dictionary
3867+
"""
38653868
err = None
38663869
working = df.copy()
38673870
# add text column for internal CountItemType name, Id columns for all but Count, removing raw-munged

0 commit comments

Comments
 (0)