Skip to content

Commit e102a9e

Browse files
author
Jan Zill
committed
adds validate utility method to align with unavailable choices in MC simulation
1 parent 94e629a commit e102a9e

2 files changed

Lines changed: 91 additions & 7 deletions

File tree

activitysim/core/interaction_sample_simulate.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,11 +303,32 @@ def _interaction_sample_simulate(
303303
if skip_choice:
304304
return choosers.join(logsums.to_frame("logsums"))
305305

306+
utilities_df = logit.validate_utils(
307+
state,
308+
utilities_df,
309+
allow_zero_probs=allow_zero_probs,
310+
trace_label=trace_label,
311+
trace_choosers=choosers,
312+
)
313+
314+
if allow_zero_probs:
315+
zero_probs = (
316+
utilities_df.sum(axis=1)
317+
<= utilities_df.shape[1] * logit.UTIL_UNAVAILABLE
318+
)
319+
if zero_probs.any():
320+
# copied from proabability below, fix when that gets fixed
321+
# FIXME this is kind of gnarly, but we force choice of first alt
322+
utilities_df.loc[
323+
zero_probs, 0
324+
] = 3.0 # arbitrary value much larger than UTIL_UNAVAILABLE
325+
306326
# positions is series with the chosen alternative represented as a column index in utilities_df
307327
# which is an integer between zero and num alternatives in the alternative sample
308328
positions, rands = logit.make_choices_utility_based(
309329
state, utilities_df, trace_label=trace_label, trace_choosers=choosers
310330
)
331+
311332
del utilities_df
312333
chunk_sizer.log_df(trace_label, "utilities_df", None)
313334
else:
@@ -382,13 +403,7 @@ def _interaction_sample_simulate(
382403

383404
chunk_sizer.log_df(trace_label, "choices", choices)
384405

385-
# order is important for short circuiting - no explicit error terms => no zero_probs
386-
if (
387-
allow_zero_probs
388-
and not state.settings.use_explicit_error_terms
389-
and zero_probs.any()
390-
and zero_prob_choice_val is not None
391-
):
406+
if allow_zero_probs and zero_probs.any() and zero_prob_choice_val is not None:
392407
# FIXME this is kind of gnarly, patch choice for zero_probs
393408
choices.loc[zero_probs] = zero_prob_choice_val
394409

activitysim/core/logit.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
EXP_UTIL_MIN = 1e-300
1818
EXP_UTIL_MAX = np.inf
1919

20+
# TODO-EET: Figure out what type we want UTIL_MIN to be, currently np.float64
21+
UTIL_MIN = np.log(EXP_UTIL_MIN, dtype=np.float64)
22+
UTIL_UNAVAILABLE = 1000.0 * (UTIL_MIN - 1.0)
23+
24+
2025
PROB_MIN = 0.0
2126
PROB_MAX = 1.0
2227

@@ -123,6 +128,70 @@ def utils_to_logsums(utils, exponentiated=False, allow_zero_probs=False):
123128
return logsums
124129

125130

131+
def validate_utils(
132+
state: workflow.State,
133+
utils,
134+
trace_label=None,
135+
allow_zero_probs=False,
136+
trace_choosers=None,
137+
):
138+
"""
139+
Validate utilities to ensure non-available choices are treated the same in EET and MC.
140+
For EET decisions, no conversion to probabilities is required because choices
141+
are made on the basis of comparing utilities (only differences matter).
142+
However, large negative utility values are used in practice to make choices
143+
unavailable based on probability calculations, which boils down to evaluating
144+
exp(utility). We here use this to define a minimum utility that corresponds
145+
to an unavailable choice.
146+
147+
Parameters
148+
----------
149+
utils : pandas.DataFrame
150+
Rows should be choosers and columns should be alternatives.
151+
152+
trace_label : str, optional
153+
label for tracing bad utility or probability values
154+
155+
allow_zero_probs : bool
156+
if True value rows in which all utility alts are UTIL_MIN will be set to
157+
UTIL_UNAVAILABLE.
158+
159+
trace_choosers : pandas.dataframe
160+
the choosers df (for interaction_simulate) to facilitate the reporting of hh_id
161+
by report_bad_choices because it can't deduce hh_id from the interaction_dataset
162+
which is indexed on index values from alternatives df
163+
164+
Returns
165+
-------
166+
utils : pandas.DataFrame
167+
utils with values that would lead to zero probability replaced by UTIL_UNAVAILABLE
168+
169+
"""
170+
trace_label = tracing.extend_trace_label(trace_label, "validate_utils")
171+
172+
utils_arr = utils.values
173+
174+
np.putmask(utils_arr, utils_arr <= UTIL_MIN, UTIL_UNAVAILABLE)
175+
176+
arr_sum = utils_arr.sum(axis=1)
177+
178+
if not allow_zero_probs:
179+
zero_probs = arr_sum <= utils_arr.shape[1] * UTIL_UNAVAILABLE
180+
if zero_probs.any():
181+
report_bad_choices(
182+
state,
183+
zero_probs,
184+
utils,
185+
trace_label=tracing.extend_trace_label(trace_label, "zero_prob_utils"),
186+
msg="all probabilities are zero",
187+
trace_choosers=trace_choosers,
188+
)
189+
190+
utils = pd.DataFrame(utils_arr, columns=utils.columns, index=utils.index)
191+
192+
return utils
193+
194+
126195
def utils_to_probs(
127196
state: workflow.State,
128197
utils,

0 commit comments

Comments
 (0)