Skip to content

Commit b6dbe95

Browse files
committed
Fix multiple sequential erlang.call() infinite loop
When Python code made multiple sequential erlang.call() invocations in the same function, the replay mechanism only cached ONE result. On replay, the second call found no cache hit and suspended again, causing an infinite loop. The fix adds a force_blocking flag that triggers when in replay context (tl_current_suspended != NULL) but no cache hit occurred. This ensures subsequent erlang.call() invocations use blocking pipe behavior instead of suspending again. - First erlang.call() in a function: suspends (frees dirty scheduler) - Subsequent calls during replay: blocking pipe (scheduler already held)
1 parent 1509fcb commit b6dbe95

4 files changed

Lines changed: 81 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626

2727
### Fixed
2828

29+
- **Multiple sequential erlang.call()** - Fixed infinite loop when Python code makes
30+
multiple sequential `erlang.call()` invocations in the same function. The replay
31+
mechanism now falls back to blocking pipe behavior for subsequent calls after the
32+
first suspension, preventing the infinite replay loop.
2933
- **Memory safety in C NIF** - Fixed memory leaks and added NULL checks
3034
- `nif_async_worker_new`: msg_env now freed on pipe/thread creation failure
3135
- `multi_executor_stop`: shutdown requests now properly freed after join

c_src/py_callback.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,16 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
579579
}
580580
}
581581

582+
/*
583+
* FIX for multiple sequential erlang.call():
584+
* If we're in replay context (tl_current_suspended != NULL) but didn't get
585+
* a cache hit above, this is a SUBSEQUENT call (e.g., second erlang.call()
586+
* in the same Python function). We MUST NOT suspend again - that would
587+
* cause an infinite loop where replay always hits this second call.
588+
* Instead, fall through to blocking pipe behavior for subsequent calls.
589+
*/
590+
bool force_blocking = (tl_current_suspended != NULL);
591+
582592
/* Build args list (remaining args) */
583593
PyObject *call_args = PyTuple_GetSlice(args, 1, nargs);
584594
if (call_args == NULL) {
@@ -590,8 +600,10 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
590600
* Suspension is only safe when the result will be directly examined by the
591601
* executor (PY_REQ_CALL or PY_REQ_EVAL). For PY_REQ_EXEC or nested Python
592602
* code, we must block and wait for the result.
603+
*
604+
* Also block if force_blocking is set (replay context with no cache hit).
593605
*/
594-
if (!tl_allow_suspension) {
606+
if (!tl_allow_suspension || force_blocking) {
595607
/* Fall back to blocking behavior - send message and wait on pipe */
596608
ErlNifEnv *msg_env = enif_alloc_env();
597609
if (msg_env == NULL) {

examples/reentrant_demo.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,27 @@ def nested_compute(n, depth):
133133

134134
# Call Erlang which will call nested_compute(n+1, depth-1)
135135
return erlang.call('nested_step', n, depth)
136+
137+
138+
def compute_chain(x):
139+
"""
140+
Compute a chain of operations using multiple sequential Erlang callbacks.
141+
142+
This function makes THREE sequential erlang.call() invocations:
143+
1. add_ten(x) -> x + 10
144+
2. multiply_by_two(step1) -> step1 * 2
145+
3. subtract_five(step2) -> step2 - 5
146+
147+
This tests the nested suspension handling - each call suspends Python,
148+
executes the Erlang callback, then resumes Python to continue to the
149+
next call.
150+
151+
Example: compute_chain(5) -> ((5 + 10) * 2) - 5 = 25
152+
"""
153+
import erlang
154+
155+
step1 = erlang.call('add_ten', x) # x + 10
156+
step2 = erlang.call('multiply_by_two', step1) # (x + 10) * 2
157+
step3 = erlang.call('subtract_five', step2) # ((x + 10) * 2) - 5
158+
159+
return step3

test/py_reentrant_SUITE.erl

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
test_nested_callbacks/1,
2020
test_callback_error_propagation/1,
2121
test_concurrent_reentrant/1,
22-
test_callback_with_complex_types/1
22+
test_callback_with_complex_types/1,
23+
test_multiple_sequential_callbacks/1
2324
]).
2425

2526
all() ->
@@ -28,7 +29,8 @@ all() ->
2829
test_nested_callbacks,
2930
test_callback_error_propagation,
3031
test_concurrent_reentrant,
31-
test_callback_with_complex_types
32+
test_callback_with_complex_types,
33+
test_multiple_sequential_callbacks
3234
].
3335

3436
init_per_suite(Config) ->
@@ -49,6 +51,9 @@ end_per_testcase(_TestCase, _Config) ->
4951
catch py:unregister_function(call_level),
5052
catch py:unregister_function(may_fail),
5153
catch py:unregister_function(transform),
54+
catch py:unregister_function(add_ten),
55+
catch py:unregister_function(multiply_by_two),
56+
catch py:unregister_function(subtract_five),
5257
ok.
5358

5459
%%% ============================================================================
@@ -192,3 +197,36 @@ test_callback_with_complex_types(_Config) ->
192197
} = Result,
193198

194199
ok.
200+
201+
%% @doc Test multiple sequential erlang.call() invocations in one Python function.
202+
%% This tests that the nested suspension handling works when Python makes
203+
%% multiple callbacks within a single function execution.
204+
test_multiple_sequential_callbacks(_Config) ->
205+
%% Register three Erlang functions that will be called sequentially
206+
py:register_function(add_ten, fun([X]) -> X + 10 end),
207+
py:register_function(multiply_by_two, fun([X]) -> X * 2 end),
208+
py:register_function(subtract_five, fun([X]) -> X - 5 end),
209+
210+
%% Use py:eval with a lambda that makes 3 sequential erlang.call() invocations.
211+
%% Each call triggers a suspension/resume cycle, and the second/third calls
212+
%% require the nested suspension fix to work correctly.
213+
%%
214+
%% The lambda pattern: (lambda x: subtract_five(multiply_by_two(add_ten(x))))(input)
215+
%% This is a single expression that makes 3 sequential callbacks.
216+
217+
%% Test with x=5: ((5 + 10) * 2) - 5 = 25
218+
Code1 = <<"(lambda erl: erl.call('subtract_five', erl.call('multiply_by_two', erl.call('add_ten', 5))))(__import__('erlang'))">>,
219+
{ok, Result1} = py:eval(Code1),
220+
25 = Result1,
221+
222+
%% Test with x=10: ((10 + 10) * 2) - 5 = 35
223+
Code2 = <<"(lambda erl: erl.call('subtract_five', erl.call('multiply_by_two', erl.call('add_ten', 10))))(__import__('erlang'))">>,
224+
{ok, Result2} = py:eval(Code2),
225+
35 = Result2,
226+
227+
%% Test with x=0: ((0 + 10) * 2) - 5 = 15
228+
Code3 = <<"(lambda erl: erl.call('subtract_five', erl.call('multiply_by_two', erl.call('add_ten', 0))))(__import__('erlang'))">>,
229+
{ok, Result3} = py:eval(Code3),
230+
15 = Result3,
231+
232+
ok.

0 commit comments

Comments
 (0)