adds validate utility method to align with unavailable choices in MC simulation

Jan Zill · Jan Zill · commit e102a9e54dbf · 2025-05-30T09:17:47.000+10:00
diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py
@@ -303,11 +303,32 @@ def _interaction_sample_simulate(
         if skip_choice:
             return choosers.join(logsums.to_frame("logsums"))
 
+        utilities_df = logit.validate_utils(
+            state,
+            utilities_df,
+            allow_zero_probs=allow_zero_probs,
+            trace_label=trace_label,
+            trace_choosers=choosers,
+        )
+
+        if allow_zero_probs:
+            zero_probs = (
+                utilities_df.sum(axis=1)
+                <= utilities_df.shape[1] * logit.UTIL_UNAVAILABLE
+            )
+            if zero_probs.any():
+                # copied from proabability below, fix when that gets fixed
+                # FIXME this is kind of gnarly, but we force choice of first alt
+                utilities_df.loc[
+                    zero_probs, 0
+                ] = 3.0  # arbitrary value much larger than UTIL_UNAVAILABLE
+
         # positions is series with the chosen alternative represented as a column index in utilities_df
         # which is an integer between zero and num alternatives in the alternative sample
         positions, rands = logit.make_choices_utility_based(
             state, utilities_df, trace_label=trace_label, trace_choosers=choosers
         )
+
         del utilities_df
         chunk_sizer.log_df(trace_label, "utilities_df", None)
     else:
@@ -382,13 +403,7 @@ def _interaction_sample_simulate(
 
     chunk_sizer.log_df(trace_label, "choices", choices)
 
-    # order is important for short circuiting - no explicit error terms => no zero_probs
-    if (
-        allow_zero_probs
-        and not state.settings.use_explicit_error_terms
-        and zero_probs.any()
-        and zero_prob_choice_val is not None
-    ):
+    if allow_zero_probs and zero_probs.any() and zero_prob_choice_val is not None:
         # FIXME this is kind of gnarly, patch choice for zero_probs
         choices.loc[zero_probs] = zero_prob_choice_val
 
diff --git a/activitysim/core/logit.py b/activitysim/core/logit.py
@@ -17,6 +17,11 @@
 EXP_UTIL_MIN = 1e-300
 EXP_UTIL_MAX = np.inf
 
+# TODO-EET: Figure out what type we want UTIL_MIN to be, currently np.float64
+UTIL_MIN = np.log(EXP_UTIL_MIN, dtype=np.float64)
+UTIL_UNAVAILABLE = 1000.0 * (UTIL_MIN - 1.0)
+
+
 PROB_MIN = 0.0
 PROB_MAX = 1.0
 
@@ -123,6 +128,70 @@ def utils_to_logsums(utils, exponentiated=False, allow_zero_probs=False):
     return logsums
 
 
+def validate_utils(
+    state: workflow.State,
+    utils,
+    trace_label=None,
+    allow_zero_probs=False,
+    trace_choosers=None,
+):
+    """
+    Validate utilities to ensure non-available choices are treated the same in EET and MC.
+    For EET decisions, no conversion to probabilities is required because choices
+    are made on the basis of comparing utilities (only differences matter).
+    However, large negative utility values are used in practice to make choices
+    unavailable based on probability calculations, which boils down to evaluating
+    exp(utility). We here use this to define a minimum utility that corresponds
+    to an unavailable choice.
+
+    Parameters
+    ----------
+    utils : pandas.DataFrame
+        Rows should be choosers and columns should be alternatives.
+
+    trace_label : str, optional
+        label for tracing bad utility or probability values
+
+    allow_zero_probs : bool
+        if True value rows in which all utility alts are UTIL_MIN will be set to
+        UTIL_UNAVAILABLE.
+
+    trace_choosers : pandas.dataframe
+        the choosers df (for interaction_simulate) to facilitate the reporting of hh_id
+        by report_bad_choices because it can't deduce hh_id from the interaction_dataset
+        which is indexed on index values from alternatives df
+
+    Returns
+    -------
+    utils : pandas.DataFrame
+        utils with values that would lead to zero probability replaced by UTIL_UNAVAILABLE
+
+    """
+    trace_label = tracing.extend_trace_label(trace_label, "validate_utils")
+
+    utils_arr = utils.values
+
+    np.putmask(utils_arr, utils_arr <= UTIL_MIN, UTIL_UNAVAILABLE)
+
+    arr_sum = utils_arr.sum(axis=1)
+
+    if not allow_zero_probs:
+        zero_probs = arr_sum <= utils_arr.shape[1] * UTIL_UNAVAILABLE
+        if zero_probs.any():
+            report_bad_choices(
+                state,
+                zero_probs,
+                utils,
+                trace_label=tracing.extend_trace_label(trace_label, "zero_prob_utils"),
+                msg="all probabilities are zero",
+                trace_choosers=trace_choosers,
+            )
+
+    utils = pd.DataFrame(utils_arr, columns=utils.columns, index=utils.index)
+
+    return utils
+
+
 def utils_to_probs(
     state: workflow.State,
     utils,