Skip to content

Commit 0809220

Browse files
committed
Add shared state API for cross-worker data sharing
Python workers have isolated namespaces, making it difficult to share data between calls. This adds an ETS-backed shared state API accessible from both Python and Erlang. Python API: from erlang import state_set, state_get, state_delete, state_keys from erlang import state_incr, state_decr state_set('key', {'data': [1, 2, 3]}) value = state_get('key') state_incr('counter') # atomic increment Erlang API: py:state_store(Key, Value). {ok, Value} = py:state_fetch(Key). py:state_incr(<<"counter">>). New files: - src/py_state.erl - ETS-backed state storage with atomic counters - examples/shared_state_example.erl - Usage demonstration Updated docs: - README.md, getting-started.md, scalability.md, ai-integration.md
1 parent a00fb90 commit 0809220

9 files changed

Lines changed: 638 additions & 6 deletions

File tree

README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,54 @@ result = erlang.call('my_func', 10, 20)
155155
All three methods are equivalent. The import and attribute syntaxes provide
156156
a more natural Python experience.
157157

158+
## Shared State Between Workers
159+
160+
Python workers don't share namespace state, but you can share data via the
161+
built-in state API:
162+
163+
### From Python
164+
165+
```python
166+
from erlang import state_set, state_get, state_delete, state_keys
167+
from erlang import state_incr, state_decr
168+
169+
# Store data (survives across calls, shared between workers)
170+
state_set('my_key', {'data': [1, 2, 3], 'count': 42})
171+
172+
# Retrieve data
173+
value = state_get('my_key') # {'data': [1, 2, 3], 'count': 42}
174+
175+
# Atomic counters (thread-safe, great for metrics)
176+
state_incr('requests') # returns 1
177+
state_incr('requests', 10) # returns 11
178+
state_decr('requests') # returns 10
179+
180+
# List keys
181+
keys = state_keys() # ['my_key', 'requests', ...]
182+
183+
# Delete
184+
state_delete('my_key')
185+
```
186+
187+
### From Erlang/Elixir
188+
189+
```erlang
190+
%% Store and fetch
191+
py:state_store(<<"my_key">>, #{value => 42}).
192+
{ok, #{value := 42}} = py:state_fetch(<<"my_key">>).
193+
194+
%% Atomic counters
195+
1 = py:state_incr(<<"hits">>).
196+
11 = py:state_incr(<<"hits">>, 10).
197+
10 = py:state_decr(<<"hits">>).
198+
199+
%% List keys and clear
200+
Keys = py:state_keys().
201+
py:state_clear().
202+
```
203+
204+
This is backed by ETS with `{write_concurrency, true}`, so counters are atomic and fast.
205+
158206
## Async/Await Support
159207

160208
Call Python async functions without blocking:

docs/ai-integration.md

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,17 @@ Embeddings convert text into numerical vectors, enabling semantic search, cluste
4242
### Using sentence-transformers
4343

4444
```erlang
45-
%% Load a model (do this once at startup)
45+
%% Load a model - NOTE: This loads in one worker only.
46+
%% Each worker will lazy-load the model on first use.
4647
init_embedding_model() ->
4748
py:exec(<<"
4849
from sentence_transformers import SentenceTransformer
4950
model = SentenceTransformer('all-MiniLM-L6-v2')
5051
">>).
5152

53+
%% Better pattern: use a module that lazy-loads
54+
%% and cache embeddings in shared state
55+
5256
%% Generate embedding for a single text
5357
embed(Text) ->
5458
{ok, Embedding} = py:eval(
@@ -575,7 +579,44 @@ model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
575579
">>).
576580
```
577581

578-
### 4. Monitor Rate Limits
582+
### 4. Cache Embeddings in Shared State
583+
584+
Avoid recomputing embeddings for the same text:
585+
586+
```erlang
587+
%% Check cache before computing
588+
embed_cached(Text) ->
589+
Key = <<"emb:", (crypto:hash(md5, Text))/binary>>,
590+
case py:state_fetch(Key) of
591+
{ok, Embedding} ->
592+
{ok, Embedding};
593+
{error, not_found} ->
594+
{ok, Embedding} = py:eval(
595+
<<"model.encode(text).tolist()">>,
596+
#{text => Text}
597+
),
598+
py:state_store(Key, Embedding),
599+
{ok, Embedding}
600+
end.
601+
```
602+
603+
Or from Python:
604+
605+
```python
606+
from erlang import state_get, state_set
607+
import hashlib
608+
609+
def embed_cached(text):
610+
key = f"emb:{hashlib.md5(text.encode()).hexdigest()}"
611+
cached = state_get(key)
612+
if cached is not None:
613+
return cached
614+
embedding = model.encode(text).tolist()
615+
state_set(key, embedding)
616+
return embedding
617+
```
618+
619+
### 5. Monitor Rate Limits
579620

580621
```erlang
581622
%% Check current load before heavy operations

docs/getting-started.md

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ def roll_dice(sides=6):
6565
```
6666

6767
Note: Definitions made with `exec` are local to the worker that executes them.
68-
Subsequent calls may go to different workers.
68+
Subsequent calls may go to different workers. Use [Shared State](#shared-state) to
69+
share data between workers.
6970

7071
## Working with Timeouts
7172

@@ -105,6 +106,57 @@ Python generators can be streamed efficiently:
105106
{ok, Chunks} = py:stream(mymodule, generate_data, [arg1, arg2]).
106107
```
107108

109+
## Shared State
110+
111+
Python workers don't share namespace state, but you can share data via the
112+
built-in state API:
113+
114+
```erlang
115+
%% Store from Erlang
116+
py:state_store(<<"config">>, #{api_key => <<"secret">>, timeout => 5000}).
117+
118+
%% Read from Python
119+
ok = py:exec(<<"
120+
from erlang import state_get
121+
config = state_get('config')
122+
print(config['api_key'])
123+
">>).
124+
```
125+
126+
### From Python
127+
128+
```python
129+
from erlang import state_set, state_get, state_delete, state_keys
130+
from erlang import state_incr, state_decr
131+
132+
# Key-value storage
133+
state_set('my_key', {'data': [1, 2, 3]})
134+
value = state_get('my_key')
135+
136+
# Atomic counters (thread-safe)
137+
state_incr('requests') # +1, returns new value
138+
state_incr('requests', 10) # +10
139+
state_decr('requests') # -1
140+
141+
# Management
142+
keys = state_keys()
143+
state_delete('my_key')
144+
```
145+
146+
### From Erlang
147+
148+
```erlang
149+
py:state_store(Key, Value).
150+
{ok, Value} = py:state_fetch(Key).
151+
py:state_remove(Key).
152+
Keys = py:state_keys().
153+
154+
%% Atomic counters
155+
1 = py:state_incr(<<"hits">>).
156+
11 = py:state_incr(<<"hits">>, 10).
157+
10 = py:state_decr(<<"hits">>).
158+
```
159+
108160
## Type Conversions
109161

110162
Values are automatically converted between Erlang and Python:

docs/scalability.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ When running on a free-threaded Python build (compiled with `--disable-gil`), er
3232

3333
Uses Python's sub-interpreter feature with per-interpreter GIL. Each sub-interpreter has its own GIL, allowing true parallel execution across interpreters.
3434

35+
**Note:** Each sub-interpreter has isolated state. Use the [Shared State](#shared-state) API to share data between workers.
36+
3537
### Multi-Executor Mode (Python < 3.12)
3638

3739
Runs N executor threads that share the GIL. Requests are distributed round-robin across executors. Good for I/O-bound workloads where Python releases the GIL during I/O operations.
@@ -200,6 +202,34 @@ io:format("Mode: ~p, Executors: ~p~n", [Mode, Executors]).
200202
io:format("GC stats: ~p~n", [maps:get(gc_stats, Stats)]).
201203
```
202204

205+
## Shared State
206+
207+
Since workers (and sub-interpreters) have isolated namespaces, erlang_python provides
208+
ETS-backed shared state accessible from both Python and Erlang:
209+
210+
```python
211+
from erlang import state_set, state_get, state_incr, state_decr
212+
213+
# Share configuration across workers
214+
config = state_get('app_config')
215+
216+
# Thread-safe metrics
217+
state_incr('requests_total')
218+
state_incr('bytes_processed', len(data))
219+
```
220+
221+
```erlang
222+
%% Set config that all workers can read
223+
py:state_store(<<"app_config">>, #{model => <<"gpt-4">>, timeout => 30000}).
224+
225+
%% Read metrics
226+
{ok, Total} = py:state_fetch(<<"requests_total">>).
227+
```
228+
229+
The state is backed by ETS with `{write_concurrency, true}`, making atomic
230+
counter operations fast and lock-free. See [Getting Started](getting-started.md#shared-state)
231+
for the full API.
232+
203233
## See Also
204234

205235
- [Getting Started](getting-started.md) - Basic usage

examples/shared_state_example.erl

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env escript
2+
%%% @doc Shared state example - demonstrates sharing data between Python workers.
3+
%%%
4+
%%% Since each Python worker has its own namespace, functions and variables
5+
%%% defined in one call aren't visible in another. The shared state API
6+
%%% provides ETS-backed storage accessible from both Python and Erlang.
7+
%%%
8+
%%% Prerequisites: rebar3 compile
9+
%%% Run from project root: escript examples/shared_state_example.erl
10+
11+
-mode(compile).
12+
13+
main(_) ->
14+
%% Add the compiled beam files to the code path
15+
ScriptDir = filename:dirname(escript:script_name()),
16+
ProjectRoot = filename:dirname(ScriptDir),
17+
EbinDir = filename:join([ProjectRoot, "_build", "default", "lib", "erlang_python", "ebin"]),
18+
true = code:add_pathz(EbinDir),
19+
20+
{ok, _} = application:ensure_all_started(erlang_python),
21+
22+
io:format("~n=== Shared State Example ===~n~n"),
23+
24+
%% Clear any existing state
25+
py:state_clear(),
26+
27+
%% Example 1: Set from Erlang, read from Python
28+
io:format("1. Erlang -> Python:~n"),
29+
py:state_store(<<"config">>, #{
30+
api_key => <<"secret123">>,
31+
max_retries => 3,
32+
timeout => 5000
33+
}),
34+
%% Python reads using idiomatic import syntax
35+
ok = py:exec(<<"
36+
from erlang import state_get
37+
38+
config = state_get('config')
39+
assert config['api_key'] == 'secret123'
40+
assert config['max_retries'] == 3
41+
">>),
42+
io:format(" Python read config successfully~n"),
43+
44+
%% Example 2: Set from Python, read from Erlang
45+
io:format("~n2. Python -> Erlang:~n"),
46+
ok = py:exec(<<"
47+
from erlang import state_set
48+
49+
state_set('computation_result', {
50+
'status': 'complete',
51+
'values': [1, 2, 3, 4, 5],
52+
'metadata': {'source': 'python', 'timestamp': 1234567890}
53+
})
54+
">>),
55+
{ok, Result} = py:state_fetch(<<"computation_result">>),
56+
io:format(" Result in Erlang: ~p~n", [Result]),
57+
58+
%% Example 3: Atomic counters - thread-safe increment/decrement
59+
io:format("~n3. Atomic counters (thread-safe):~n"),
60+
61+
%% From Erlang
62+
Val1 = py:state_incr(<<"hits">>),
63+
io:format(" Erlang incr: ~p~n", [Val1]),
64+
Val2 = py:state_incr(<<"hits">>, 10),
65+
io:format(" Erlang incr by 10: ~p~n", [Val2]),
66+
67+
%% From Python
68+
ok = py:exec(<<"
69+
from erlang import state_incr, state_decr
70+
71+
val = state_incr('hits', 5)
72+
print(f' Python incr by 5: {val}')
73+
74+
val = state_decr('hits', 3)
75+
print(f' Python decr by 3: {val}')
76+
">>),
77+
78+
{ok, FinalCount} = py:state_fetch(<<"hits">>),
79+
io:format(" Final counter value: ~p~n", [FinalCount]),
80+
81+
%% Example 4: List all keys from Python
82+
io:format("~n4. List keys from Python:~n"),
83+
ok = py:exec(<<"
84+
from erlang import state_keys
85+
86+
keys = state_keys()
87+
assert 'hits' in keys
88+
assert 'config' in keys
89+
">>),
90+
Keys = py:state_keys(),
91+
io:format(" Keys: ~p~n", [Keys]),
92+
93+
%% Example 5: Delete from Python
94+
io:format("~n5. Delete from Python:~n"),
95+
ok = py:exec(<<"
96+
from erlang import state_delete
97+
98+
state_delete('computation_result')
99+
">>),
100+
case py:state_fetch(<<"computation_result">>) of
101+
{error, not_found} -> io:format(" Key deleted successfully~n");
102+
{ok, _} -> io:format(" ERROR: Key still exists~n")
103+
end,
104+
105+
%% Cleanup
106+
py:state_clear(),
107+
108+
io:format("~n=== Done ===~n~n"),
109+
ok = application:stop(erlang_python).

src/erlang_python_sup.erl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
%%% Manages the worker pools for Python execution:
1818
%%% <ul>
1919
%%% <li>py_callback - Callback registry for Python to Erlang calls</li>
20+
%%% <li>py_state - Shared state storage accessible from Python</li>
2021
%%% <li>py_pool - Main worker pool for synchronous Python calls</li>
2122
%%% <li>py_async_pool - Worker pool for asyncio coroutines</li>
2223
%%% <li>py_subinterp_pool - Worker pool for sub-interpreter parallelism</li>
@@ -42,6 +43,12 @@ init([]) ->
4243
%% Initialize callback registry ETS table (owned by supervisor for resilience)
4344
ok = py_callback:init_tab(),
4445

46+
%% Initialize shared state ETS table (owned by supervisor for resilience)
47+
ok = py_state:init_tab(),
48+
49+
%% Register state functions as callbacks for Python access
50+
ok = py_state:register_callbacks(),
51+
4552
%% Callback registry - must start before pool
4653
CallbackSpec = #{
4754
id => py_callback,

0 commit comments

Comments
 (0)