Skip to content

Commit c68a100

Browse files
committed
fix: enhance LaTeX variable extraction with nested subscript support and add corresponding tests
1 parent dd8a5fc commit c68a100

12 files changed

Lines changed: 583 additions & 85 deletions

File tree

src/pydasa/serialization/parser.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from sympy import Symbol, symbols
1717

1818
# import global variables
19-
from pydasa.validations.patterns import LATEX_RE
19+
# from pydasa.validations.patterns import LATEX_RE
20+
from pydasa.validations.patterns import LATEX_VAR_TOKEN_RE
2021
from pydasa.validations.patterns import PI_POW_RE
2122
from pydasa.validations.patterns import PI_COEF_RE
2223
from pydasa.validations.patterns import BASIC_OPS_RE
@@ -57,8 +58,8 @@ def extract_latex_vars(expr: str) -> Tuple[Dict[str, str], Dict[str, str]]:
5758
- The first dictionary maps LaTeX variable names to their Python equivalents.
5859
- The second dictionary maps Python variable names to their LaTeX equivalents.
5960
"""
60-
# Extract latex variable names with regex
61-
matches = re.findall(LATEX_RE, expr)
61+
# Extract latex variable names with regex (supports nested subscripts)
62+
matches = [m.group(0) for m in re.finditer(LATEX_VAR_TOKEN_RE, expr)]
6263

6364
# Filter out ignored LaTeX commands
6465
matches = [m for m in matches if m not in IGNORE_EXPR]
@@ -108,7 +109,9 @@ def create_latex_mapping(expr: str) -> Tuple[Dict[Symbol, Symbol], # symbol_map
108109
py_symbol_map = {} # For lambdify
109110

110111
# Create symbols for each variable found by regex
111-
for latex_var, py_var in latex_to_py.items():
112+
# Iterate over a snapshot to avoid mutating the dict mid-loop
113+
fallback_aliases: Dict[str, Any] = {}
114+
for latex_var, py_var in list(latex_to_py.items()):
112115
# Create SymPy symbol with Python-compatible name
113116
sym = symbols(py_var)
114117
py_symbol_map[py_var] = sym
@@ -118,10 +121,19 @@ def create_latex_mapping(expr: str) -> Tuple[Dict[Symbol, Symbol], # symbol_map
118121
try:
119122
latex_sym = parse_latex(latex_var)
120123
symbol_map[latex_sym] = sym
124+
# Safety net: register SymPy-rendered string in py_symbol_map
125+
# (_aliases) so _aliases lookups don't fail if .subs() doesn't
126+
# fully resolve the symbol (e.g. 'M_{a*(c*t_{A*S})}' -> sym).
127+
# Do NOT add to latex_to_py — that would pollute variable lookups.
128+
fallback_key = str(latex_sym)
129+
if fallback_key not in py_symbol_map:
130+
fallback_aliases[fallback_key] = sym
121131
except Exception:
122132
# If parsing fails, use the py_var as key
123133
symbol_map[symbols(py_var)] = sym
124134

135+
py_symbol_map.update(fallback_aliases)
136+
125137
return symbol_map, py_symbol_map, latex_to_py, py_to_latex
126138

127139

src/pydasa/validations/patterns.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@
2323
LaTeX regex pattern to match LaTeX symbols (e.g., '\\alpha') with optional subscripts (e.g., '\\mu_{1}', '\\lambda_{s}') in *PyDASA*.
2424
"""
2525

26+
# :attr: LATEX_VAR_TOKEN_RE
27+
LATEX_VAR_TOKEN_RE: str = (
28+
r"(\\[A-Za-z]+|[A-Za-z][A-Za-z0-9]*)"
29+
r"(?:_(?:[A-Za-z0-9]+|\{(?:[^{}]|\{[^{}]*\})+\}))?"
30+
)
31+
"""
32+
Regex pattern to match LaTeX-like variable tokens with optional subscripts,
33+
including one nested brace level inside subscripts.
34+
35+
Examples:
36+
'\\alpha', '\\mu_{1}', 'M_{buf_{AS}}'
37+
"""
38+
2639
# NOTE: OG REGEX!
2740
# DFLT_POW_RE: str = r"\-?\d+" # r'\^(-?\d+)'
2841
# :attr: DFLT_POW_RE

src/pydasa/workflows/influence.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,9 @@ def analyze_symbolic(self,
223223
# Get variable values
224224
values = {}
225225
for var_sym in analysis._latex_to_py.keys():
226-
# Ensure symbol is a string
226+
# Skip SymPy-mangled fallback keys — only look up real variables
227+
if var_sym not in self._variables:
228+
continue
227229
values[var_sym] = self._get_variable_value(var_sym, val_type)
228230
# Perform analysis
229231
result = analysis.analyze_symbolically(values)
@@ -260,6 +262,9 @@ def analyze_numeric(self,
260262
vals = []
261263
bounds = []
262264
for var_sym in analysis._latex_to_py.keys():
265+
# Skip SymPy-mangled fallback keys — only look up real variables
266+
if var_sym not in self._variables:
267+
continue
263268
var = self._variables[var_sym]
264269
min_val = var.std_min if var.std_min is not None else (var.min if var.min is not None else -0.1)
265270
max_val = var.std_max if var.std_max is not None else (var.max if var.max is not None else -10.0)

tests/notebooks/PyDASA-Online-Custom.ipynb

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"cell_type": "code",
45-
"execution_count": 39,
45+
"execution_count": 1,
4646
"id": "e5706ebb",
4747
"metadata": {},
4848
"outputs": [
@@ -87,7 +87,7 @@
8787
},
8888
{
8989
"cell_type": "code",
90-
"execution_count": 40,
90+
"execution_count": 2,
9191
"id": "820e92cc",
9292
"metadata": {},
9393
"outputs": [
@@ -167,7 +167,7 @@
167167
},
168168
{
169169
"cell_type": "code",
170-
"execution_count": 41,
170+
"execution_count": 3,
171171
"id": "ccf545d2",
172172
"metadata": {},
173173
"outputs": [
@@ -316,7 +316,7 @@
316316
},
317317
{
318318
"cell_type": "code",
319-
"execution_count": 42,
319+
"execution_count": 4,
320320
"id": "54e5439c",
321321
"metadata": {},
322322
"outputs": [
@@ -379,7 +379,7 @@
379379
},
380380
{
381381
"cell_type": "code",
382-
"execution_count": 43,
382+
"execution_count": 5,
383383
"id": "d88d165e",
384384
"metadata": {},
385385
"outputs": [
@@ -455,7 +455,7 @@
455455
},
456456
{
457457
"cell_type": "code",
458-
"execution_count": 44,
458+
"execution_count": 6,
459459
"id": "5e1fc0fc",
460460
"metadata": {},
461461
"outputs": [
@@ -498,7 +498,7 @@
498498
}
499499
],
500500
"source": [
501-
"from queueing import Queue\n",
501+
"from src.queueing import Queue\n",
502502
"import itertools\n",
503503
"import pandas as pd\n",
504504
"\n",
@@ -562,7 +562,7 @@
562562
},
563563
{
564564
"cell_type": "code",
565-
"execution_count": 45,
565+
"execution_count": 7,
566566
"id": "f05c5e28",
567567
"metadata": {},
568568
"outputs": [
@@ -610,7 +610,7 @@
610610
},
611611
{
612612
"cell_type": "code",
613-
"execution_count": 46,
613+
"execution_count": 8,
614614
"id": "7a102b8b",
615615
"metadata": {},
616616
"outputs": [
@@ -665,7 +665,7 @@
665665
},
666666
{
667667
"cell_type": "code",
668-
"execution_count": 54,
668+
"execution_count": 9,
669669
"id": "f2ae194c",
670670
"metadata": {},
671671
"outputs": [

tests/notebooks/PyDASA-Online-Tutorial.ipynb

Lines changed: 5 additions & 5 deletions
Large diffs are not rendered by default.

tests/notebooks/PyDASA-Online-Underneath.ipynb

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"id": "97c90f36",
77
"metadata": {},
88
"outputs": [],
@@ -34,10 +34,18 @@
3434
},
3535
{
3636
"cell_type": "code",
37-
"execution_count": null,
37+
"execution_count": 2,
3838
"id": "2f602b47",
3939
"metadata": {},
40-
"outputs": [],
40+
"outputs": [
41+
{
42+
"name": "stdout",
43+
"output_type": "stream",
44+
"text": [
45+
"Schema(sym='', fwk='COMPUTATION', alias='', idx=-1, name='', description='', fdu_lt=[Dimension(_sym='T', _fwk='COMPUTATION', _alias='', _idx=0, _name='Time', description='Duration of an event or interval.', _unit='s'), Dimension(_sym='S', _fwk='COMPUTATION', _alias='', _idx=1, _name='Space', description='Physical extent in three dimensions.', _unit='bit'), Dimension(_sym='N', _fwk='COMPUTATION', _alias='', _idx=2, _name='Complexity', description='Measure of interconnectedness or intricacy in a system.', _unit='op')], fdu_map={'T': Dimension(_sym='T', _fwk='COMPUTATION', _alias='', _idx=0, _name='Time', description='Duration of an event or interval.', _unit='s'), 'S': Dimension(_sym='S', _fwk='COMPUTATION', _alias='', _idx=1, _name='Space', description='Physical extent in three dimensions.', _unit='bit'), 'N': Dimension(_sym='N', _fwk='COMPUTATION', _alias='', _idx=2, _name='Complexity', description='Measure of interconnectedness or intricacy in a system.', _unit='op')}, fdu_symbols=['T', 'S', 'N'], fdu_regex='^[TSN](\\\\^-?\\\\d+)?(\\\\*[TSN](?:\\\\^-?\\\\d+)?)*$', fdu_pow_regex='\\\\-?\\\\d+', fdu_no_pow_regex='[TSN](?!\\\\^)', fdu_sym_regex='[TSN]')\n"
46+
]
47+
}
48+
],
4149
"source": [
4250
"# example for variables\n",
4351
"from pydasa import Variable, Schema\n",
@@ -93,10 +101,18 @@
93101
},
94102
{
95103
"cell_type": "code",
96-
"execution_count": null,
104+
"execution_count": 3,
97105
"id": "43dc8935",
98106
"metadata": {},
99-
"outputs": [],
107+
"outputs": [
108+
{
109+
"name": "stdout",
110+
"output_type": "stream",
111+
"text": [
112+
"\\Pi_{0}: \\frac{h*g}{v^{2}}\n"
113+
]
114+
}
115+
],
100116
"source": [
101117
"# Example of dimensional model/matrix\n",
102118
"# Simple example: Free fall\n",
@@ -127,10 +143,22 @@
127143
},
128144
{
129145
"cell_type": "code",
130-
"execution_count": null,
146+
"execution_count": 4,
131147
"id": "54894724",
132148
"metadata": {},
133-
"outputs": [],
149+
"outputs": [
150+
{
151+
"name": "stdout",
152+
"output_type": "stream",
153+
"text": [
154+
"Formula: \\frac{R*g}{v^{2}}\n",
155+
"Variables: dict_keys(['v', 'R', 'g'])\n",
156+
"Exponents: {'v': -2, 'R': 1, 'g': 1}\n",
157+
"Monte Carlo mean: 19.63924, std: 11.344896062400338\n",
158+
"Monte Carlo mean: 1.54508618261774, std: 2.8186506157982407\n"
159+
]
160+
}
161+
],
134162
"source": [
135163
"# coeffcicient example\n",
136164
"import numpy as np\n",
@@ -205,10 +233,21 @@
205233
},
206234
{
207235
"cell_type": "code",
208-
"execution_count": null,
236+
"execution_count": 5,
209237
"id": "13227281",
210238
"metadata": {},
211-
"outputs": [],
239+
"outputs": [
240+
{
241+
"name": "stdout",
242+
"output_type": "stream",
243+
"text": [
244+
"Reynolds Formula: \\frac{\\rho*D*v}{\\mu}\n",
245+
"Variables: ['\\\\rho', 'D', '\\\\mu', 'v']\n",
246+
"Analysis exported to data/reynolds_da.json\n",
247+
"Restored coefficients: ['\\\\Pi_{0}']\n"
248+
]
249+
}
250+
],
212251
"source": [
213252
"# Dimensional Engine Example\n",
214253
"from pydasa import Variable, Schema, AnalysisEngine\n",
@@ -281,10 +320,34 @@
281320
},
282321
{
283322
"cell_type": "code",
284-
"execution_count": null,
323+
"execution_count": 6,
285324
"id": "b380e098",
286325
"metadata": {},
287-
"outputs": [],
326+
"outputs": [
327+
{
328+
"name": "stdout",
329+
"output_type": "stream",
330+
"text": [
331+
"Dimensional analysis complete: ['\\\\Pi_{0}']\n",
332+
"\n",
333+
"=== Symbolic Sensitivity Analysis Results ===\n",
334+
"\n",
335+
"Sensitivity for SEN_{\\Pi_{0}}:\n",
336+
"\t∂π/∂D (Diameter): +4.9900e+06\n",
337+
"\t∂π/∂\\mu (Viscosity): -2.4900e+08\n",
338+
"\t∂π/∂\\rho (Density): +2.4950e+02\n",
339+
"\t∂π/∂v (Velocity): +4.9900e+04\n",
340+
"\n",
341+
"=== Numerical Sensitivity Analysis Results (FAST) ===\n",
342+
"\n",
343+
"Sensitivity for SEN_{\\Pi_{0}}:\n",
344+
"\tS1: [\\rho] (Density): 0.0004\n",
345+
"\tS1: [v] (Velocity): 0.0731\n",
346+
"\tS1: [D] (Diameter): 0.8900\n",
347+
"\tS1: [\\mu] (Viscosity): 0.0174\n"
348+
]
349+
}
350+
],
288351
"source": [
289352
"# Sensitivity Analysis Example\n",
290353
"from pydasa import Variable, Schema, AnalysisEngine\n",
@@ -426,10 +489,32 @@
426489
},
427490
{
428491
"cell_type": "code",
429-
"execution_count": null,
492+
"execution_count": 7,
430493
"id": "bae26bce",
431494
"metadata": {},
432-
"outputs": [],
495+
"outputs": [
496+
{
497+
"name": "stdout",
498+
"output_type": "stream",
499+
"text": [
500+
"Dimensional analysis complete: ['\\\\Pi_{0}']\n",
501+
"✓ Monte Carlo simulation complete\n",
502+
" Experiments: 1000\n",
503+
" Simulations: ['\\\\Pi_{0}']\n",
504+
"\n",
505+
"Reynolds Number Statistics:\n",
506+
"\tMean: 1.75e+05\n",
507+
"\tMedian: 1.73e+05\n",
508+
"\tStd Dev: 7.50e+04\n",
509+
"\tMin: 4.58e+04\n",
510+
"\tMax: 3.54e+05\n",
511+
"\n",
512+
"Result array shape: (1000,)\n",
513+
"\tFirst 5 results: [ 51557.56468247 249338.40800126 70518.40469616 103721.16447116\n",
514+
" 104505.61895793]...\n"
515+
]
516+
}
517+
],
433518
"source": [
434519
"from pydasa import Variable, Schema, AnalysisEngine\n",
435520
"from pydasa.dimensional.fundamental import Dimension\n",

0 commit comments

Comments
 (0)