Skip to content

Commit 548c724

Browse files
committed
Kansas munging improvements
1 parent cad633a commit 548c724

14 files changed

Lines changed: 196 additions & 47 deletions

src/electiondata/munge/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,8 @@ def melt_to_one_count_column(
624624
if "in_count_headers" in p["munge_field_types"]:
625625
# split header_0 column into separate columns
626626
# # get header_rows
627+
# TODO: the following throws PerformanceError for Kansas House of Representatives 2020g. Rather than
628+
# assigning values, need to use melted = pd.concat([melted, <new_columns>])
627629
melted[
628630
[f"count_header_{idx}" for idx in p["count_header_row_numbers"]]
629631
] = pd.DataFrame(melted["header_0"].str.split(";:;", expand=True).values)[

src/ini_files_for_results/Kansas/ks_20g_ks_house_official.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[election_results]
22
results_file=Kansas/2020_General_Election_Kansas_House_of_Representatives_results_by_precinct.xlsx
3-
munger_list=ks_gen_main,ks_gen_johnson_count_from_B,ks_gen_shawnee_count_from_B,ks_gen_sedgwick,ks_gen_wyandotte_4_line_header_first_count_col_3
3+
munger_list=ks_gen_main,ks_gen_johnson_count_from_B,ks_gen_shawnee_count_from_B,ks_gen_sedgwick,ks_gen_wyandotte_4_line_header_first_count_col_3,ks_gen_wyandotte_3_line_header_first_count_col_3,ks_gen_wyandotte_4_line_header_first_count_col_3_merged_rows
44
jurisdiction=Kansas
55
election=2020 General
66
results_short_name=ks_20g_kshouse

src/jurisdictions/Kansas/Candidate.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,12 @@ Rick Kloos
125125
Rachel Willis
126126
Brenda S. Dietrich
127127
Anthony Hensley
128-
Under Votes
129-
Over Votes
130128
Laura McConwell
131129
Ethan Corson
132130
Diana Whittington
133131
Cindy Holscher
134132
Vail Fruechting
135133
Ty Masterson
136-
Total Votes Cast
137134
Timothy Don Fry II
138135
Mary Ware
139136
Dan Kerschen
@@ -356,3 +353,6 @@ Vic (T-Bone) Miller
356353
Vicki Schmidt
357354
Virgil Weigel
358355
Wendy Bingesser
356+
Jordan Michael Mackey
357+
Greg Conchola
358+
Rick Parsons

src/jurisdictions/Kansas/dictionary.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ Candidate Molly Baumgardner Baumgardner, Molly
167167
Candidate Monica Murnan Murnan, Monica
168168
Candidate Nancy J. Ingle Ingle, Nancy J.
169169
Candidate Other Other
170-
Candidate Over Votes Over Votes
171170
Candidate Pat Pettey Pat Pettey
172171
Candidate Pat Proctor Proctor, Pat
173172
Candidate Patrick Penn Penn, Patrick
@@ -224,13 +223,11 @@ Candidate Todd Maddox Maddox, Todd
224223
Candidate Tom Hawk Hawk, Tom
225224
Candidate Tom Holland Holland, Tom
226225
Candidate Tory Marie Arnberger Arnberger, Tory Marie
227-
Candidate Total Votes Cast Total Votes Cast
228226
Candidate Tracey Mann Mann, Tracey
229227
Candidate Trevor Jacobs Jacobs, Trevor
230228
Candidate Troy L. Waymaster Waymaster, Troy L.
231229
Candidate Ty Masterson Masterson, Ty
232230
Candidate Ty Masterson Ty Masterson
233-
Candidate Under Votes Under Votes
234231
Candidate Vail Fruechting Vail Fruechting
235232
Candidate Virgil Peck Peck, Virgil
236233
Candidate W. Michael Shimeall Shimeall, W. Michael
@@ -6761,3 +6758,6 @@ ReportingUnit Kansas;Wilson County Kansas;Wilson
67616758
ReportingUnit Kansas;Woodson County Kansas;Woodson
67626759
ReportingUnit Kansas;Wyandotte County Kansas;Wyandotte
67636760
CandidateContest KS Attorney General KS;Attorney General;statewide
6761+
Candidate Jordan Michael Mackey Jordan Michael Mackey
6762+
Candidate Greg Conchola Greg Conchola
6763+
Candidate Rick Parsons Rick Parsons

src/mungers/ks_gen_johnson_count_from_B.munger

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,11 @@ CandidateContest=<count_header_0>
4949
Party=<count_header_2>
5050

5151

52-
53-
54-
55-
5652
# Values to ignore (optional) #
5753
[ignore]
5854
## E.g: Candidate=Total Votes Cast,Registered Voters ##
5955
ReportingUnit=JOHNSON;COUNTY TOTALS,Johnson;COUNTY TOTALS
56+
Candidate=Write-in,Under Votes,Over Votes
6057

6158
# Lookup formula sections #
6259
## Required when foreign keys are used in munge formulas and ##

src/mungers/ks_gen_sedgwick.munger

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Party={<count_header_3>,^(\w\w\w) .*$}
6262
# Values to ignore (optional) #
6363
[ignore]
6464
## E.g: Candidate=Total Votes Cast,Registered Voters ##
65-
Candidate=Write-in Totals,Totals
65+
Candidate=Write-in Totals,Totals,Total Votes Cast
6666
ReportingUnit=SEDGWICK;Totals,Sedgwick;Totals
6767

6868
# Lookup formula sections #

src/mungers/ks_gen_shawnee.munger

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Party={<count_header_2>,^(\w\w\w) .*$}
6262
# Values to ignore (optional) #
6363
[ignore]
6464
## E.g: Candidate=Total Votes Cast,Registered Voters ##
65-
Candidate=Write-in Totals
65+
Candidate=Write-in Totals,Write-in
6666

6767
# Lookup formula sections #
6868
## Required when foreign keys are used in munge formulas and ##
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Format parameters section (required) #
2+
[format]
3+
## Required format parameters:
4+
#### File type must be one of: excel,json-nested,xml,flat_text
5+
file_type=excel
6+
#### Counts are found in one way of: by_name,by_number
7+
count_location=by_number:3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
8+
9+
merged_cells=yes
10+
11+
################################################
12+
## Sometimes required format parameters:
13+
#### for flat_text file type:
14+
flat_text_delimiter=
15+
#### if count_columns_specified is 'by_name':
16+
count_fields_by_name=
17+
#### if count_columns_specified is 'by_number':
18+
#### if 'in_count_headers' is in munge_strings
19+
#### (start numbering from first unskipped row):
20+
count_header_row_numbers=0,1,2
21+
#### if 'constant_over_file' is in munge_strings (NB: give value for each in .ini file):
22+
constant_over_file=CountItemType
23+
#### if file type is flat_text or excel and count_columns_specified is 'by_name'
24+
#### (start numbering from first unskipped row):
25+
count_field_name_row=
26+
#### if file type is flat_text or excel and not all rows are data:
27+
#### (start numbering from first unskipped row):
28+
noncount_header_row=0
29+
30+
################################################
31+
## Optional format parameters:
32+
#### for any file type:
33+
thousands_separator=,
34+
encoding=
35+
36+
#### for a flat_text or excel file type:
37+
###### if field names are not given in file
38+
#all_rows=data
39+
###### if there are multiple blocks of data per page, each with its own headers
40+
multi_block=yes
41+
42+
#### for excel file type:
43+
sheets_to_read_names=Wyandotte,WYANDOTTE
44+
sheets_to_read_numbers=
45+
sheets_to_skip_names=
46+
47+
#### for xml file type
48+
nesting_tags=
49+
50+
# Munge formula sections (required if in munge_strings list) #
51+
[munge formulas]
52+
ReportingUnit=<sheet_name>;<column_1>
53+
Candidate={<count_header_2>,^(?:\w\w\w |)(.*)$}
54+
CandidateContest=<count_header_0>
55+
Party={<count_header_2>,^(\w\w\w) .*$}
56+
57+
58+
59+
# Values to ignore (optional) #
60+
[ignore]
61+
## E.g: Candidate=Total Votes Cast,Registered Voters ##
62+
ReportingUnit=WYANDOTTE;Totals,Wyandotte;Totals,WYANDOTTE;
63+
Candidate=Write-in Totals
64+
65+
# Lookup formula sections #
66+
## Required when foreign keys are used in munge formulas and ##
67+
## must be looked up in another table. ##
68+
## See mi_gen18.munger for example ##
69+
##################################################################
70+

src/mungers/ks_gen_wyandotte_4_line_header_first_count_col_3.munger

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Party={<count_header_2>,^(\w\w\w) .*$}
6060
[ignore]
6161
## E.g: Candidate=Total Votes Cast,Registered Voters ##
6262
ReportingUnit=WYANDOTTE;Totals,Wyandotte;Totals,WYANDOTTE;
63-
Candidate=Write-in Totals
63+
Candidate=Write-in Totals,Vote For 1
6464

6565
# Lookup formula sections #
6666
## Required when foreign keys are used in munge formulas and ##
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Format parameters section (required) #
2+
[format]
3+
## Required format parameters:
4+
#### File type must be one of: excel,json-nested,xml,flat_text
5+
file_type=excel
6+
#### Counts are found in one way of: by_name,by_number
7+
count_location=by_number:3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
8+
9+
merged_cells=yes
10+
11+
################################################
12+
## Sometimes required format parameters:
13+
#### for flat_text file type:
14+
flat_text_delimiter=
15+
#### if count_columns_specified is 'by_name':
16+
count_fields_by_name=
17+
#### if count_columns_specified is 'by_number':
18+
#### if 'in_count_headers' is in munge_strings
19+
#### (start numbering from first unskipped row):
20+
count_header_row_numbers=0,2,3
21+
#### if 'constant_over_file' is in munge_strings (NB: give value for each in .ini file):
22+
constant_over_file=CountItemType
23+
#### if file type is flat_text or excel and count_columns_specified is 'by_name'
24+
#### (start numbering from first unskipped row):
25+
count_field_name_row=
26+
#### if file type is flat_text or excel and not all rows are data:
27+
#### (start numbering from first unskipped row):
28+
noncount_header_row=0
29+
30+
################################################
31+
## Optional format parameters:
32+
#### for any file type:
33+
thousands_separator=,
34+
encoding=
35+
36+
#### for a flat_text or excel file type:
37+
###### if field names are not given in file
38+
#all_rows=data
39+
###### if there are multiple blocks of data per page, each with its own headers
40+
multi_block=yes
41+
42+
#### for excel file type:
43+
sheets_to_read_names=Wyandotte,WYANDOTTE
44+
sheets_to_read_numbers=
45+
sheets_to_skip_names=
46+
47+
#### for xml file type
48+
nesting_tags=
49+
50+
# Munge formula sections (required if in munge_strings list) #
51+
[munge formulas]
52+
ReportingUnit=<sheet_name>;<column_1>
53+
Candidate={<count_header_3>,^(?:\w\w\w |)(.*)$}
54+
CandidateContest=<count_header_0>
55+
Party={<count_header_3>,^(\w\w\w) .*$}
56+
57+
58+
59+
# Values to ignore (optional) #
60+
[ignore]
61+
## E.g: Candidate=Total Votes Cast,Registered Voters ##
62+
ReportingUnit=WYANDOTTE;Totals,Wyandotte;Totals,WYANDOTTE;
63+
Candidate=Write-in Totals
64+
65+
# Lookup formula sections #
66+
## Required when foreign keys are used in munge formulas and ##
67+
## must be looked up in another table. ##
68+
## See mi_gen18.munger for example ##
69+
##################################################################
70+

0 commit comments

Comments
 (0)