Skip to content

Commit 31de3f4

Browse files
authored
testing changes (#117)
2 parents 9c408aa + bf2987e commit 31de3f4

4 files changed

Lines changed: 276 additions & 132 deletions

File tree

analysis/write_processed_csv_files.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def get_demographic_table(df_measure_output):
2929
df_demograph = df_measure_output[
3030
df_measure_output["measure"].isin(demograph_strata)
3131
]
32+
3233
df_demograph = df_demograph[
3334
[
3435
"measure",

app/measures.py

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
DECILE = "Decile"
1414
MEDIAN = "Median"
1515

16-
1716
@dataclasses.dataclass
1817
class Measure:
1918
name: str
@@ -52,44 +51,83 @@ def change_in_median(self, from_year, to_year, month):
5251
def deciles_chart(self):
5352
# selections
5453
legend_selection = altair.selection_point(bind="legend", fields=["label"])
55-
5654
# encodings
5755
stroke_dash = altair.StrokeDash(
5856
"label",
5957
title=None,
6058
scale=altair.Scale(
6159
domain=[DECILE, MEDIAN],
62-
range=[[1, 1], [5, 5], [0, 0]],
60+
range=[[2, 2], [5, 5], [0, 0]],
6361
),
6462
legend=altair.Legend(orient="bottom"),
6563
)
6664
stroke_width = (
67-
altair.when(altair.datum.type == MEDIAN)
68-
.then(altair.value(1))
69-
.otherwise(altair.value(0.5))
65+
altair.when(altair.datum.label == MEDIAN)
66+
.then(altair.value(2))
67+
.otherwise(altair.value(0.75))
7068
)
7169
opacity = (
7270
altair.when(legend_selection)
7371
.then(altair.value(1))
7472
.otherwise(altair.value(0.2))
7573
)
76-
7774
# chart
78-
chart = (
75+
line_chart = (
7976
altair.Chart(self.deciles_table, title=self.chart_units)
8077
.mark_line()
8178
.encode(
82-
altair.X("date", axis=altair.Axis(format="%b %y"), title=None),
83-
altair.Y("value", title=None),
79+
altair.X(
80+
"yearmonth(date):T",
81+
axis=altair.Axis(
82+
format="%b %y",
83+
title=None,
84+
labelColor="#222",
85+
labelFontSize=14,
86+
labelAngle=45,
87+
),
88+
),
89+
altair.Y(
90+
"value",
91+
axis=altair.Axis(title=None, labelColor="#222", labelFontSize=14),
92+
scale=altair.Scale(zero=False),
93+
),
8494
detail="percentile",
8595
strokeDash=stroke_dash,
8696
strokeWidth=stroke_width,
97+
color=altair.Color(
98+
"label",
99+
scale=altair.Scale(
100+
domain=[DECILE, MEDIAN],
101+
range=["#DE8F05", "#0173B2"],
102+
),
103+
legend=altair.Legend(orient="bottom"),
104+
),
87105
opacity=opacity,
88106
)
89107
.add_params(legend_selection)
90108
)
109+
110+
# Text labels at rightmost points for median
111+
text_labels = (
112+
altair.Chart(self.deciles_table)
113+
.mark_text(align="left", dx=5, fontSize=12, color="#0173B2")
114+
.encode(
115+
altair.X("yearmonth(date):T"),
116+
altair.Y("value:Q", scale=altair.Scale(zero=False)), # ADD HERE TOO
117+
text=altair.value("median"),
118+
)
119+
.transform_filter(altair.datum.label == MEDIAN)
120+
.transform_window(
121+
rank="rank()",
122+
sort=[altair.SortField("date", order="descending")],
123+
groupby=["percentile"],
124+
)
125+
.transform_filter(altair.datum.rank == 1)
126+
)
127+
128+
chart = (line_chart + text_labels).resolve_scale(y='shared') # ENSURE SHARED SCALE
91129
return chart
92-
130+
93131
def measure_chart(self, measure_name):
94132
chart = (
95133
altair.Chart(self.measures_tables[measure_name])
@@ -128,7 +166,7 @@ def _construct(self, name):
128166
# them as functions rather than as methods. Doing so makes them easier to mock.
129167
counts = _get_counts(record["counts_table_url"])
130168
top_5_codes_table = _get_top_5_codes_table(record["top_5_codes_table_url"])
131-
deciles_table = _get_deciles_table(record["deciles_table_url"])
169+
deciles_table = _get_deciles_table(record["deciles_table_url"], record.get("chart_type",""))
132170
if "measures_tables_url" in record:
133171
measures_tables = dict(_get_measures_tables(record["measures_tables_url"]))
134172
else:
@@ -148,8 +186,8 @@ def _construct(self, name):
148186
)
149187

150188
def list(self):
151-
"""List the names of all the measures in the repository."""
152-
return self._records.keys()
189+
"""List the names of all the measures in the repository, in alphabetical order."""
190+
return sorted(self._records.keys(), key=str.lower)
153191

154192

155193
def _get_counts(counts_table_url):
@@ -168,9 +206,17 @@ def _get_top_5_codes_table(top_5_codes_table_url):
168206
return top_5_codes_table
169207

170208

171-
def _get_deciles_table(deciles_table_url):
172-
log.info(f"Getting deciles table from {deciles_table_url}")
209+
def _get_deciles_table(deciles_table_url, chart_type=None):
210+
chart_type = chart_type or ""
211+
log.info("ENTER _get_deciles_table", chart_type=chart_type, url=deciles_table_url, file=__file__)
173212
deciles_table = pandas.read_csv(deciles_table_url, parse_dates=["date"])
213+
214+
log.info(
215+
"BEFORE scaling",
216+
sample=deciles_table["value"].head(5).tolist(),
217+
dtype=str(deciles_table["value"].dtype),
218+
)
219+
174220
deciles_table.loc[:, "label"] = PERCENTILE
175221
is_decile = (
176222
(deciles_table["percentile"] != 0)
@@ -181,12 +227,24 @@ def _get_deciles_table(deciles_table_url):
181227
deciles_table.loc[deciles_table["percentile"] == 50, "label"] = MEDIAN
182228

183229
# Obviously, this is sub-optimal.
184-
if "hba1c_diab_mean_tests" not in deciles_table_url:
230+
if chart_type != "mean":
185231
deciles_table["value"] = deciles_table["value"] / 10
186232

233+
log.info(
234+
"AFTER scaling",
235+
sample=deciles_table["value"].head(5).tolist(),
236+
dtype=str(deciles_table["value"].dtype),
237+
)
238+
187239
# As is this.
188240
deciles_table = deciles_table[deciles_table["label"] != PERCENTILE]
189241

242+
log.info(
243+
"RETURNING deciles_table",
244+
rows=len(deciles_table),
245+
min=float(deciles_table["value"].min()),
246+
max=float(deciles_table["value"].max()),
247+
)
190248
return deciles_table
191249

192250

@@ -216,5 +274,9 @@ def _get_measures_tables(measures_tables_url):
216274
5: "Chinese or Other Ethnic Groups",
217275
}
218276
)
277+
278+
#filter out "unknown" from IMD measure
279+
if measure_header == "IMD":
280+
measure_table = measure_table[measure_table["IMD"] != "unknown"]
219281

220282
yield measure_header, measure_table

0 commit comments

Comments
 (0)