Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

In development

**Bug fixes**

- Fix edge case in SMC(k) model where a random value of exactly zero triggered an assert.
({issue}`2523` {pr}`2525`, {user}`currocam`, {user}`jeromekelleher`).


## [1.4.1] - 2026-03-05

**Maintenance**
Expand Down
12 changes: 12 additions & 0 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -7688,6 +7688,18 @@ msp_smc_k_common_ancestor_event(
x_hull = msp_get_hull(self, hull_id, label);
remaining_mass = fenwick_get_cumulative_sum(coal_mass_index, hull_id) - random_mass;

/* When gsl_ran_flat draws random_mass == 0 (probability ~1/2^32 per
* call) — or when the zero-skip path in fenwick_find lands the result
* on a leading run of zero-valued slots — remaining_mass starts at
* exactly x_hull->count. The walk loop below decrements once per
* matching predecessor and exits on remaining_mass < 0; with
* remaining_mass == count it would over-iterate by one and run off
* the head of the AVL tree. Clamp here so the loop terminates after
* `count` matches. */
if (remaining_mass >= (double) x_hull->count) {
remaining_mass = (double) x_hull->count - 0.5;
}

/* find second hull */
search = &x_hull->left_avl_node;
for (search = search->prev; remaining_mass >= 0; search = search->prev) {
Expand Down
164 changes: 164 additions & 0 deletions lib/tests/test_ancestry.c
Original file line number Diff line number Diff line change
Expand Up @@ -4656,6 +4656,168 @@ test_smc_k_gc(void)
}
}

/* A static GSL rng_type whose draws are scripted from a fixed array.
* gsl_rng_uniform calls get_double directly, so providing only that
* callback is enough to control every uniform draw the simulator makes
* (gsl_ran_flat, gsl_ran_exponential, gsl_rng_uniform_int all funnel
* through gsl_rng_uniform). */
typedef struct {
size_t call_count;
const double *u_values;
size_t n_u_values;
} scripted_rng_state_t;

static void
scripted_rng_set(void *state, unsigned long int seed)
{
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
(void) seed;
s->call_count = 0;
}

static unsigned long int
scripted_rng_get(void *state)
{
/* Not normally called when get_double is set, but provide a sane
* fallback in case some code path uses it. */
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
if (s->call_count >= s->n_u_values) {
return 1UL;
}
return (unsigned long int) (s->u_values[s->call_count++] * 4294967296.0);
}

static double
scripted_rng_get_double(void *state)
{
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
if (s->call_count >= s->n_u_values) {
/* Past the end of the script, return a benign default. */
return 0.5;
}
return s->u_values[s->call_count++];
}

static const gsl_rng_type scripted_rng_type = {
"scripted", /* name */
0xffffffffUL, /* max */
0UL, /* min */
sizeof(scripted_rng_state_t),
scripted_rng_set,
scripted_rng_get,
scripted_rng_get_double,
};

/* Regression test for the SMCk common-ancestor walk: when gsl_ran_flat
* draws random_mass == 0, remaining_mass starts at exactly x_hull->count.
* Pre-fix the >= 0 walk predicate over-iterated by one and ran off the
* head of the AVL tree, tripping tsk_bug_assert. Uses a scripted RNG to
* hit the trigger condition deterministically on the first CA event of
* a 2-sample SMCk simulation. */
static void
test_smc_k_zero_random_mass(void)
{
int ret;
uint32_t n = 2;
sample_t samples[2] = { { 0, 0.0 }, { 0, 0.0 } };
msp_t msp;
tsk_table_collection_t tables;

/* Script:
* [0] = 0.5 -> gsl_ran_exponential for the CA wait time (any
* positive value < 1 yields a finite wait).
* [1] = 0.0 -> gsl_ran_flat(rng, 0, num_pairs) inside
* msp_smc_k_common_ancestor_event returns 0,
* making remaining_mass == x_hull->count exactly.
* Subsequent draws fall through to the 0.5 default. */
static const double script[] = { 0.5, 0.0 };
scripted_rng_state_t rng_state
= { .call_count = 0, .u_values = script, .n_u_values = 2 };
gsl_rng rng = { .type = &scripted_rng_type, .state = &rng_state };

ret = build_sim(&msp, &tables, &rng, 100.0, 1, samples, n);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = msp_set_simulation_model_smc_k(&msp, 1.0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = msp_initialise(&msp);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* One event is enough to coalesce the only pair. Pre-fix this aborts
* via tsk_bug_assert; post-fix it returns 0. */
ret = msp_run(&msp, DBL_MAX, 1);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_TRUE(msp_is_completed(&msp));

msp_verify(&msp, 0);

ret = msp_free(&msp);
CU_ASSERT_EQUAL(ret, 0);
tsk_table_collection_free(&tables);
}

/* Sibling test for the SMCk clamp in a non-trivial state. The base
* test_smc_k_zero_random_mass triggers the clamp on the very first CA
* event of a fresh 2-sample simulation. This test exercises the same
* clamp on the SECOND CA event, after one prior coalescence has freed
* and reused hull slots, decremented insertion orders for sibling
* same-left hulls, and rearranged the hulls_left / hulls_right AVL
* trees. Pre-fix the second event aborts via tsk_bug_assert; post-fix
* both events succeed.
*
* (We tried to engineer the second event to land on a hull with
* count > 1 via the fenwick zero-skip path, but `fenwick_find` always
* stops at the FIRST non-zero slot — and in an all-overlapping setup
* any non-zero "predecessor" slot blocks the path to a higher-count
* hull. So the second event also exercises count == 1, just from a
* post-event state rather than the initial state.) */
static void
test_smc_k_zero_random_mass_chained(void)
{
int ret;
uint32_t n = 4;
sample_t samples[4] = { { 0, 0.0 }, { 0, 0.0 }, { 0, 0.0 }, { 0, 0.0 } };
msp_t msp;
tsk_table_collection_t tables;

/* Script:
* [0] = 0.5 -> event 1 gsl_ran_exponential, finite wait.
* [1] = 0.0833 -> event 1 gsl_ran_flat: random_mass = 0.0833 * 6 ≈ 0.5.
* Selects a hull whose count is non-zero; walk back
* coalesces with one of its matching predecessors.
* [2] = 0.5 -> event 2 gsl_ran_exponential, finite wait.
* [3] = 0.0 -> event 2 gsl_ran_flat: random_mass = 0. The clamp
* at lib/msprime.c:7691-7700 fires. */
static const double script[] = { 0.5, 0.0833, 0.5, 0.0 };
scripted_rng_state_t rng_state
= { .call_count = 0, .u_values = script, .n_u_values = 4 };
gsl_rng rng = { .type = &scripted_rng_type, .state = &rng_state };

ret = build_sim(&msp, &tables, &rng, 100.0, 1, samples, n);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* hull_offset = 0 keeps hull right == lineage tail->right. */
ret = msp_set_simulation_model_smc_k(&msp, 0.0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = msp_initialise(&msp);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Two CA events, hitting the max_events cap. Pre-fix the second
* event aborts via tsk_bug_assert; post-fix msp_run returns
* MSP_EXIT_MAX_EVENTS and we've coalesced 2 pairs (4 -> 2 ancestors). */
ret = msp_run(&msp, DBL_MAX, 2);
CU_ASSERT_EQUAL(ret, MSP_EXIT_MAX_EVENTS);
CU_ASSERT_EQUAL(msp_get_num_ancestors(&msp), 2);

msp_verify(&msp, 0);

ret = msp_free(&msp);
CU_ASSERT_EQUAL(ret, 0);
tsk_table_collection_free(&tables);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -4771,6 +4933,8 @@ main(int argc, char **argv)
{ "test_mixed_model_smc_k_large", test_mixed_model_smc_k_large },
{ "test_fenwick_rebuild_smc_k", test_fenwick_rebuild_smc_k },
{ "test_smc_k_gc", test_smc_k_gc },
{ "test_smc_k_zero_random_mass", test_smc_k_zero_random_mass },
{ "test_smc_k_zero_random_mass_chained", test_smc_k_zero_random_mass_chained },
CU_TEST_INFO_NULL,
};

Expand Down
Loading