ActivitySim
diff --git a/‎activitysim/abm/models/location_choice.py‎
Lines changed: 9 additions & 0 deletions b/‎activitysim/abm/models/location_choice.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎activitysim/abm/models/parking_location_choice.py‎
Lines changed: 7 additions & 0 deletions b/‎activitysim/abm/models/parking_location_choice.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎activitysim/abm/models/trip_destination.py‎
Lines changed: 8 additions & 1 deletion b/‎activitysim/abm/models/trip_destination.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎activitysim/abm/models/trip_scheduling_choice.py‎
Lines changed: 2 additions & 0 deletions b/‎activitysim/abm/models/trip_scheduling_choice.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎activitysim/core/interaction_sample.py‎
Lines changed: 93 additions & 51 deletions b/‎activitysim/core/interaction_sample.py‎
Lines changed: 93 additions & 51 deletions
@@ -17,6 +17,7 @@
 )
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.logit import AltsContext
 from activitysim.core.util import reindex
 from activitysim.core.exceptions import DuplicateWorkflowTableError
 
@@ -603,6 +604,7 @@ def run_location_simulate(
     chunk_tag,
     trace_label,
     skip_choice=False,
+    alts_context: AltsContext | None = None,
 ):
     """
     run location model on location_sample annotated with mode_choice logsum
@@ -712,6 +714,7 @@ def run_location_simulate(
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "simulate"
         ),
+        alts_context=alts_context,
     )
 
     if not want_logsums:
@@ -788,6 +791,11 @@ def run_location_choice(
         if choosers.shape[0] == 0:
             logger.info(f"{trace_label} skipping segment {segment_name}: no choosers")
             continue
+        # using land use rather than size terms in case something goes 0 base -> nonzero project, double
+        # check if that would be in dest_size_terms as a zero
+        alts_context = AltsContext.from_series(dest_size_terms.index) # index zone_id, not ALT_DEST_COL_NAME
+        # assumes that dest_size_terms will always contain zeros for non-attractive zones, i.e. it will have the
+        # same length as land_use
 
         # - location_sample
         location_sample_df = run_location_sample(
@@ -841,6 +849,7 @@ def run_location_choice(
                 trace_label, "simulate.%s" % segment_name
             ),
             skip_choice=skip_choice,
+            alts_context=alts_context,
         )
 
         if estimator:
 
@@ -21,6 +21,7 @@
 from activitysim.core.configuration.base import PreprocessorSettings
 from activitysim.core.configuration.logit import LogitComponentSettings
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.logit import AltsContext
 from activitysim.core.tracing import print_elapsed_time
 from activitysim.core.util import assign_in_place, drop_unused_columns
 from activitysim.core.exceptions import DuplicateWorkflowTableError
@@ -112,6 +113,7 @@ def parking_destination_simulate(
     chunk_size,
     trace_hh_id,
     trace_label,
+    alts_context: AltsContext | None = None,
 ):
     """
     Chose destination from destination_sample (with od_logsum and dp_logsum columns added)
@@ -150,6 +152,7 @@ def parking_destination_simulate(
         trace_label=trace_label,
         trace_choice_name="parking_loc",
         explicit_chunk_size=model_settings.explicit_chunk,
+        alts_context=alts_context,
     )
 
     # drop any failed zero_prob destinations
@@ -211,6 +214,9 @@ def choose_parking_location(
     )
     destination_sample.index = np.repeat(trips.index.values, len(alternatives))
     destination_sample.index.name = trips.index.name
+    # using destination_sample would also be right because destination_sample isn't a sample here,
+    # but that could change
+    alts_context = AltsContext.from_series(alternatives[alt_dest_col_name])
 
     destinations = parking_destination_simulate(
         state,
@@ -223,6 +229,7 @@ def choose_parking_location(
         chunk_size=chunk_size,
         trace_hh_id=trace_hh_id,
         trace_label=trace_label,
+        alts_context=alts_context
     )
 
     if want_sample_table:
 
@@ -32,6 +32,7 @@
 from activitysim.core.configuration.logit import LocationComponentSettings
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.logit import AltsContext
 from activitysim.core.skim_dictionary import DataFrameMatrix
 from activitysim.core.tracing import print_elapsed_time
 from activitysim.core.util import assign_in_place, reindex
@@ -950,6 +951,7 @@ def trip_destination_simulate(
     skim_hotel,
     estimator,
     trace_label,
+    alts_context: AltsContext | None = None,
 ):
     """
     Chose destination from destination_sample (with od_logsum and dp_logsum columns added)
@@ -1036,6 +1038,7 @@ def trip_destination_simulate(
         trace_choice_name="trip_dest",
         estimator=estimator,
         explicit_chunk_size=model_settings.explicit_chunk,
+        alts_context=alts_context,
     )
 
     if not want_logsums:
@@ -1126,7 +1129,10 @@ def choose_trip_destination(
         destination_sample["dp_logsum"] = 0.0
 
     t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0, debug=True)
-
+    alt_dest_col_name = model_settings.ALT_DEST_COL_NAME
+    alts = alternatives.index
+    assert alts.name == alt_dest_col_name
+    alts_context = AltsContext.from_series(alts)
     destinations = trip_destination_simulate(
         state,
         primary_purpose=primary_purpose,
@@ -1138,6 +1144,7 @@ def choose_trip_destination(
         skim_hotel=skim_hotel,
         estimator=estimator,
         trace_label=trace_label,
+        alts_context=alts_context,
     )
 
     dropped_trips = ~trips.index.isin(destinations.index)
 
@@ -20,6 +20,7 @@
 )
 from activitysim.core.configuration.logit import LogitComponentSettings
 from activitysim.core.interaction_sample_simulate import _interaction_sample_simulate
+from activitysim.core.logit import AltsContext
 from activitysim.core.skim_dataset import SkimDataset
 from activitysim.core.skim_dictionary import SkimDict
 
@@ -314,6 +315,7 @@ def run_trip_scheduling_choice(
                 estimator=None,
                 chunk_sizer=chunk_sizer,
                 compute_settings=model_settings.compute_settings,
+                alts_context= AltsContext(schedules[SCHEDULE_ID].min(), schedules[SCHEDULE_ID].max()),
             )
 
             assert len(choices.index) == len(choosers.index)
 
@@ -3,10 +3,10 @@
 from __future__ import annotations
 
 import logging
+import typing
 
 import numpy as np
 import pandas as pd
-
 from activitysim.core import (
     chunk,
     estimation,
@@ -17,16 +17,34 @@
     util,
     workflow,
 )
+from activitysim.core.chunk import ChunkSizer
 from activitysim.core.configuration.base import ComputeSettings
 from activitysim.core.exceptions import SegmentedSpecificationError
 from activitysim.core.skim_dataset import DatasetWrapper
 from activitysim.core.skim_dictionary import SkimWrapper
+if typing.TYPE_CHECKING:
+    from activitysim.core.random import Random
 
 logger = logging.getLogger(__name__)
 
 DUMP = False
 
 
+def _poisson_sample_alternatives_inner(
+    alternative_count: int,
+    probs: pd.DataFrame,
+    poisson_inclusion_probs: pd.DataFrame,
+    rng: Random,
+    trace_label: str | None,
+    chunk_sizer:ChunkSizer,
+) -> pd.DataFrame:
+    rands = rng.random_for_df(probs, n=alternative_count)
+    chunk_sizer.log_df(trace_label, "rands", rands)
+    sampled_mask = rands < poisson_inclusion_probs
+    sampled_results = poisson_inclusion_probs.where(sampled_mask)
+    return sampled_results
+
+
 def make_sample_choices_utility_based(
     state: workflow.State,
     choosers,
@@ -36,8 +54,8 @@ def make_sample_choices_utility_based(
     alternative_count,
     alt_col_name,
     allow_zero_probs,
-    trace_label,
-    chunk_sizer,
+    trace_label:str,
+    chunk_sizer:ChunkSizer,
 ):
     assert isinstance(utilities, pd.DataFrame)
     assert utilities.shape == (len(choosers), alternative_count)
@@ -60,32 +78,6 @@ def make_sample_choices_utility_based(
 
     utils_array = utilities.to_numpy()
     chunk_sizer.log_df(trace_label, "utils_array", utils_array)
-    chosen_destinations = []
-
-    rands = state.get_rn_generator().gumbel_for_df(utilities, n=alternative_count)
-    chunk_sizer.log_df(trace_label, "rands", rands)
-
-    # TODO-EET [janzill Jun2022]: using for-loop to keep memory usage low, an array of dimension
-    #  (len(choosers), alternative_count, sample_size) can get very large. Probably better to
-    #  use chunking for this.
-    for i in range(sample_size):
-        # created this once for memory logging
-        if i > 0:
-            rands = state.get_rn_generator().gumbel_for_df(
-                utilities, n=alternative_count
-            )
-        chosen_destinations.append(np.argmax(utils_array + rands, axis=1))
-    chosen_destinations = np.concatenate(chosen_destinations, axis=0)
-
-    chunk_sizer.log_df(trace_label, "chosen_destinations", chosen_destinations)
-
-    del utils_array
-    chunk_sizer.log_df(trace_label, "utils_array", None)
-    del rands
-    chunk_sizer.log_df(trace_label, "rands", None)
-
-    chooser_idx = np.tile(np.arange(utilities.shape[0]), sample_size)
-    chunk_sizer.log_df(trace_label, "chooser_idx", chooser_idx)
 
     probs = logit.utils_to_probs(
         state,
@@ -95,28 +87,69 @@ def make_sample_choices_utility_based(
         overflow_protection=not allow_zero_probs,
         trace_choosers=choosers,
     )
-    chunk_sizer.log_df(trace_label, "probs", probs)
-
-    choices_df = pd.DataFrame(
-        {
-            alt_col_name: alternatives.index.values[chosen_destinations],
-            "prob": probs.to_numpy()[chooser_idx, chosen_destinations],
-            choosers.index.name: choosers.index.values[chooser_idx],
-        }
+    inclusion_probs, sampled_alternatives = _poisson_sample_alternatives(alternative_count, chunk_sizer, probs,
+                                                                         sample_size, state, trace_label)
+
+    # Stack removes the NaNs (the ones that weren't sampled)
+    # and gives us a multi-index of (person_id, alt_id)
+    choices_df = (
+        sampled_alternatives.rename_axis("alt_idx", axis=1)
+        .stack()
+        .reset_index(name="prob")
+        .assign(**{alt_col_name: lambda df: alternatives.index.values[df["alt_idx"]]})
+        .drop(columns=["alt_idx"])
     )
-    chunk_sizer.log_df(trace_label, "choices_df", choices_df)
-
-    del chooser_idx
-    chunk_sizer.log_df(trace_label, "chooser_idx", None)
-    del chosen_destinations
-    chunk_sizer.log_df(trace_label, "chosen_destinations", None)
-    del probs
-    chunk_sizer.log_df(trace_label, "probs", None)
 
-    # handing this off to caller
-    chunk_sizer.log_df(trace_label, "choices_df", None)
-
-    return choices_df
+    # Here we return the inclusion probabilities i.e. the true probability of being sampled and (ab)use the fact
+    # that pick_count=1 by definition and ln(1)=0 and recover the standard sample correction term.
+    # In non-Poisson sampling, we would return the probs of sampling an alternative once
+    # and the sampling correction factor np.log(df.pick_count/df.prob) is applied to the simulate utilities.
+    # TODO is it safe change the meaning of df.prob, given it's referenced in expression csvs?
+    #   (but the alternative is to update all the expression CSV for sampling?)
+    return choices_df, inclusion_probs
+
+
+def _poisson_sample_alternatives(alternative_count, chunk_sizer: ChunkSizer, probs: pd.DataFrame, sample_size,
+                                 state: workflow.State, trace_label: str) -> tuple[pd.DataFrame, pd.DataFrame]:
+    # compute the inclusion probability as the reciprocal of alt never being drawn
+    #  -- these are common, so compute once upfront
+    exclusion_probs = (1 - probs) ** sample_size
+    inclusion_probs = 1 - exclusion_probs
+
+    n = 0
+    probs_subset = probs
+    inclusion_probs_subset = inclusion_probs
+    sampled_alternatives = pd.DataFrame(0.0, index=inclusion_probs.index, columns=inclusion_probs.columns)
+    while True:
+        sampled_results_subset = _poisson_sample_alternatives_inner(
+            alternative_count, probs_subset, inclusion_probs_subset, state.get_rn_generator(), trace_label, chunk_sizer
+        )
+        no_alts_sampled_mask = sampled_results_subset.isna().all(axis=1)
+        alts_with_sampled_alternatives = sampled_results_subset[~no_alts_sampled_mask]
+        sampled_alternatives.loc[alts_with_sampled_alternatives.index, :] = alts_with_sampled_alternatives
+        if no_alts_sampled_mask.any():
+            # TODO if this happens in base but the project case is such that something is picked, random numbers won't
+            #  be consistent - we're asserting that this is very rare models where the sample size is not too small
+            logger.info(f"Poisson sampling of alternatives failed with {n=}, retrying")
+            # TODO put this behind a debug guard, because it will be slow
+            logger.info(
+                f"Sampled size was {sample_size}, poisson method mean expected sample size was {inclusion_probs.sum(axis=1).mean():.1f}, actual sampled mean was {(sampled_alternatives > 0).sum(axis=1).mean():.1f} and highest zero selection prob was {(exclusion_probs).product(axis=1).max():.2g}")
+            probs_subset = probs[no_alts_sampled_mask]
+            inclusion_probs_subset = inclusion_probs[no_alts_sampled_mask]
+
+        else:  # All alternatives are fine
+            break
+
+        n += 1
+        if n == 10:
+            choosers_no_alts_sampled = sampled_results_subset[no_alts_sampled_mask]
+            msg = (f"Poisson choice set sampling failed after 10 attempts for these cases:\n"
+                   f"{choosers_no_alts_sampled}\n{probs_subset}")
+            raise ValueError(msg)
+
+    chunk_sizer.log_df(trace_label, "sampled_alternatives", sampled_alternatives)
+
+    return inclusion_probs, sampled_alternatives
 
 
 def make_sample_choices(
@@ -227,7 +260,7 @@ def _interaction_sample(
     locals_d=None,
     trace_label=None,
     zone_layer=None,
-    chunk_sizer=None,
+    chunk_sizer: ChunkSizer|None=None,
     compute_settings: ComputeSettings | None = None,
 ):
     """
@@ -292,6 +325,9 @@ def _interaction_sample(
         pick_count : int
             number of duplicate picks for chooser, alt
     """
+    assert chunk_sizer is not None, "chunk_sizer cannot be None but old nullable signature is preserved"
+    # TODO it's probably safe to reorder these arguments to make chunk_sizer mandatory since
+    #   _interaction_sample is private?
 
     have_trace_targets = state.tracing.has_trace_targets(choosers)
     trace_ids = None
@@ -812,7 +848,13 @@ def interaction_sample(
         assert choosers.index.is_monotonic_increasing
 
     # FIXME - legacy logic - not sure this is needed or even correct?
-    sample_size = min(sample_size, len(alternatives.index))
+    if not state.settings.use_explicit_error_terms:
+        sample_size = min(sample_size, len(alternatives.index))
+        # with poisson sampling, definitely don't want to reduce sample size - it's not a sample size but a number
+        # of theoretical draws. Another options would be to disable sampling if # alts < sample size to ensure
+        # all are included (but this wouldn't behave well if there were land use changes in the project case which
+        # switched regimes)
+
     logger.info(f" --- interaction_sample sample size = {sample_size}")
 
     result_list = []
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`)`
`21`	`21`	`from activitysim.core.configuration.logit import LogitComponentSettings`
`22`	`22`	`from activitysim.core.interaction_sample_simulate import _interaction_sample_simulate`
	`23`	`+from activitysim.core.logit import AltsContext`
`23`	`24`	`from activitysim.core.skim_dataset import SkimDataset`
`24`	`25`	`from activitysim.core.skim_dictionary import SkimDict`
`25`	`26`
`@@ -314,6 +315,7 @@ def run_trip_scheduling_choice(`
`314`	`315`	`estimator=None,`
`315`	`316`	`chunk_sizer=chunk_sizer,`
`316`	`317`	`compute_settings=model_settings.compute_settings,`
	`318`	`+ alts_context= AltsContext(schedules[SCHEDULE_ID].min(), schedules[SCHEDULE_ID].max()),`
`317`	`319`	`)`
`318`	`320`
`319`	`321`	`assert len(choices.index) == len(choosers.index)`