|
17 | 17 | EXP_UTIL_MIN = 1e-300 |
18 | 18 | EXP_UTIL_MAX = np.inf |
19 | 19 |
|
| 20 | +# TODO-EET: Figure out what type we want UTIL_MIN to be, currently np.float64 |
| 21 | +UTIL_MIN = np.log(EXP_UTIL_MIN, dtype=np.float64) |
| 22 | +UTIL_UNAVAILABLE = 1000.0 * (UTIL_MIN - 1.0) |
| 23 | + |
| 24 | + |
20 | 25 | PROB_MIN = 0.0 |
21 | 26 | PROB_MAX = 1.0 |
22 | 27 |
|
@@ -123,6 +128,70 @@ def utils_to_logsums(utils, exponentiated=False, allow_zero_probs=False): |
123 | 128 | return logsums |
124 | 129 |
|
125 | 130 |
|
| 131 | +def validate_utils( |
| 132 | + state: workflow.State, |
| 133 | + utils, |
| 134 | + trace_label=None, |
| 135 | + allow_zero_probs=False, |
| 136 | + trace_choosers=None, |
| 137 | +): |
| 138 | + """ |
| 139 | + Validate utilities to ensure non-available choices are treated the same in EET and MC. |
| 140 | + For EET decisions, no conversion to probabilities is required because choices |
| 141 | + are made on the basis of comparing utilities (only differences matter). |
| 142 | + However, large negative utility values are used in practice to make choices |
| 143 | + unavailable based on probability calculations, which boils down to evaluating |
| 144 | + exp(utility). We here use this to define a minimum utility that corresponds |
| 145 | + to an unavailable choice. |
| 146 | +
|
| 147 | + Parameters |
| 148 | + ---------- |
| 149 | + utils : pandas.DataFrame |
| 150 | + Rows should be choosers and columns should be alternatives. |
| 151 | +
|
| 152 | + trace_label : str, optional |
| 153 | + label for tracing bad utility or probability values |
| 154 | +
|
| 155 | + allow_zero_probs : bool |
| 156 | + if True value rows in which all utility alts are UTIL_MIN will be set to |
| 157 | + UTIL_UNAVAILABLE. |
| 158 | +
|
| 159 | + trace_choosers : pandas.dataframe |
| 160 | + the choosers df (for interaction_simulate) to facilitate the reporting of hh_id |
| 161 | + by report_bad_choices because it can't deduce hh_id from the interaction_dataset |
| 162 | + which is indexed on index values from alternatives df |
| 163 | +
|
| 164 | + Returns |
| 165 | + ------- |
| 166 | + utils : pandas.DataFrame |
| 167 | + utils with values that would lead to zero probability replaced by UTIL_UNAVAILABLE |
| 168 | +
|
| 169 | + """ |
| 170 | + trace_label = tracing.extend_trace_label(trace_label, "validate_utils") |
| 171 | + |
| 172 | + utils_arr = utils.values |
| 173 | + |
| 174 | + np.putmask(utils_arr, utils_arr <= UTIL_MIN, UTIL_UNAVAILABLE) |
| 175 | + |
| 176 | + arr_sum = utils_arr.sum(axis=1) |
| 177 | + |
| 178 | + if not allow_zero_probs: |
| 179 | + zero_probs = arr_sum <= utils_arr.shape[1] * UTIL_UNAVAILABLE |
| 180 | + if zero_probs.any(): |
| 181 | + report_bad_choices( |
| 182 | + state, |
| 183 | + zero_probs, |
| 184 | + utils, |
| 185 | + trace_label=tracing.extend_trace_label(trace_label, "zero_prob_utils"), |
| 186 | + msg="all probabilities are zero", |
| 187 | + trace_choosers=trace_choosers, |
| 188 | + ) |
| 189 | + |
| 190 | + utils = pd.DataFrame(utils_arr, columns=utils.columns, index=utils.index) |
| 191 | + |
| 192 | + return utils |
| 193 | + |
| 194 | + |
126 | 195 | def utils_to_probs( |
127 | 196 | state: workflow.State, |
128 | 197 | utils, |
|
0 commit comments