Skip to content

Commit 6b3f1e5

Browse files
committed
hyperbase.py now delegating to smaller modules with well-defined concerns: builders.py, correctness.py, transforms.py, patterns.checks.py and patterns.matcher.py
1 parent 961ad94 commit 6b3f1e5

15 files changed

Lines changed: 668 additions & 637 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
### Changed
99
- multiple patterns functions are now Hyperedge/Atom methods: is_wildcard, is_pattern, is_fun_pattern, is_variable, contains_variable, variable_name
10+
- hyperbase.py now delegating to smaller modules with well-defined concerns: builders.py, correctness.py, transforms.py, patterns.checks.py and patterns.matcher.py.
1011

1112
### Removed
1213

src/hyperbase/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from hyperbase.hyperedge import hedge
1+
from hyperbase.builders import hedge
22
from hyperbase.loaders import load_edges
33
from hyperbase.parsers import get_parser
44

src/hyperbase/builders.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Iterable
4+
from typing import TYPE_CHECKING, cast
5+
6+
from hyperbase.hyperedge import Atom, Hyperedge, UniqueAtom
7+
8+
if TYPE_CHECKING:
9+
from hyperbase.parsers.parse_result import ParseResult
10+
11+
12+
def str_to_atom(s: str) -> str:
13+
"""Converts a string into a valid atom."""
14+
atom = s.lower()
15+
16+
atom = atom.replace("%", "%25")
17+
atom = atom.replace("/", "%2f")
18+
atom = atom.replace(" ", "%20")
19+
atom = atom.replace("(", "%28")
20+
atom = atom.replace(")", "%29")
21+
atom = atom.replace(".", "%2e")
22+
atom = atom.replace("*", "%2a")
23+
atom = atom.replace("&", "%26")
24+
atom = atom.replace("@", "%40")
25+
atom = atom.replace("\n", "%0a")
26+
atom = atom.replace("\r", "%0d")
27+
28+
return atom
29+
30+
31+
def _edge_str_has_outer_parens(edge_str: str) -> bool:
32+
"""Check if string representation of edge is delimited by outer
33+
parenthesis.
34+
"""
35+
if len(edge_str) < 2:
36+
return False
37+
return edge_str[0] == "("
38+
39+
40+
def split_edge_str(edge_str: str) -> tuple[str, ...]:
41+
"""Shallow split into tokens of a string representation of an edge,
42+
without outer parenthesis.
43+
"""
44+
start = 0
45+
depth = 0
46+
str_length = len(edge_str)
47+
active = 0
48+
tokens: list[str] = []
49+
for i in range(str_length):
50+
c = edge_str[i]
51+
if c == " ":
52+
if active and depth == 0:
53+
tokens.append(edge_str[start:i])
54+
active = 0
55+
elif c == "(":
56+
if depth == 0:
57+
active = 1
58+
start = i
59+
depth += 1
60+
elif c == ")":
61+
depth -= 1
62+
if depth == 0:
63+
tokens.append(edge_str[start : i + 1])
64+
active = 0
65+
elif depth < 0:
66+
raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
67+
else:
68+
if not active:
69+
active = 1
70+
start = i
71+
72+
if active:
73+
if depth > 0:
74+
raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
75+
else:
76+
tokens.append(edge_str[start:])
77+
78+
return tuple(tokens)
79+
80+
81+
def _parsed_token(token: str) -> Hyperedge:
82+
if _edge_str_has_outer_parens(token):
83+
return hedge(token)
84+
else:
85+
return Atom(token)
86+
87+
88+
def _collect_positions(tok_pos: Hyperedge) -> list[int]:
89+
"""Collect all valid (>= 0) token positions from a tok_pos tree."""
90+
if tok_pos.atom:
91+
pos = int(str(tok_pos))
92+
return [pos] if pos >= 0 else []
93+
else:
94+
positions: list[int] = []
95+
for sub in tok_pos:
96+
positions.extend(_collect_positions(sub))
97+
return positions
98+
99+
100+
def _rebuild_with_text(
101+
edge: Hyperedge,
102+
tok_pos: Hyperedge,
103+
tokens: list[str],
104+
) -> Hyperedge:
105+
"""Recursively rebuild an edge, assigning text from tokens and tok_pos."""
106+
if edge.atom:
107+
atom = cast(Atom, edge)
108+
pos = int(str(tok_pos))
109+
text = tokens[pos] if pos >= 0 else None
110+
return Atom(str(atom), atom.parens, text=text)
111+
else:
112+
new_children = tuple(
113+
_rebuild_with_text(sub_edge, sub_tok_pos, tokens)
114+
for sub_edge, sub_tok_pos in zip(edge, tok_pos, strict=False)
115+
)
116+
positions = _collect_positions(tok_pos)
117+
if positions:
118+
min_pos = min(positions)
119+
max_pos = max(positions)
120+
text = " ".join(tokens[min_pos : max_pos + 1])
121+
else:
122+
text = None
123+
return Hyperedge(new_children, text=text)
124+
125+
126+
def hedge(
127+
source: str | Hyperedge | list | tuple | ParseResult,
128+
) -> Hyperedge:
129+
"""Create a hyperedge."""
130+
# Check for ParseResult via duck typing to avoid circular import
131+
if (
132+
hasattr(source, "tok_pos")
133+
and hasattr(source, "tokens")
134+
and hasattr(source, "edge")
135+
):
136+
from hyperbase.parsers import ParseResult
137+
138+
_source = cast(ParseResult, source)
139+
edge = _rebuild_with_text(_source.edge, _source.tok_pos, _source.tokens)
140+
object.__setattr__(edge, "text", _source.text)
141+
return edge
142+
if type(source) in {tuple, list}:
143+
_source = cast(Iterable, source)
144+
return Hyperedge(tuple(hedge(item) for item in _source))
145+
elif type(source) is str:
146+
edge_str = source.strip().replace("\n", " ")
147+
edge_inner_str = edge_str
148+
149+
parens = _edge_str_has_outer_parens(edge_str)
150+
if parens:
151+
edge_inner_str = edge_str[1:-1]
152+
153+
tokens = split_edge_str(edge_inner_str)
154+
if not tokens:
155+
raise ValueError(f"Edge string is empty: '{source}'")
156+
edges = tuple(_parsed_token(token) for token in tokens)
157+
if len(edges) == 1 and isinstance(edges[0], Atom):
158+
return Atom(str(edges[0]), parens)
159+
elif len(edges) > 0:
160+
return Hyperedge(edges)
161+
else:
162+
raise ValueError(f"Edge string is empty: '{source}'")
163+
elif type(source) in {Hyperedge, Atom, UniqueAtom}:
164+
return source # type: ignore
165+
else:
166+
raise TypeError(
167+
f"Cannot create hyperedge from {type(source).__name__}: {source!r}"
168+
)
169+
170+
171+
def build_atom(text: str, *parts: str) -> Atom:
172+
"""Build an atom from text and other parts."""
173+
atom = str_to_atom(text)
174+
parts_str = "/".join([part for part in parts if part])
175+
if len(parts_str) > 0:
176+
atom_str = "".join((atom, "/", parts_str))
177+
return Atom(atom_str)

src/hyperbase/cli/repl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from rich.text import Text
2020
from rich.tree import Tree
2121

22-
from hyperbase.hyperedge import Atom, Hyperedge, hedge
22+
from hyperbase.builders import hedge
23+
from hyperbase.hyperedge import Atom, Hyperedge
2324
from hyperbase.parsers import Parser, get_parser, list_parsers
2425
from hyperbase.parsers.correctness import badness_check
2526

src/hyperbase/constants.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,23 @@
55

66
# Pattern functions
77
PATTERN_FUNCTIONS: set[str] = {"var", "atoms", "lemma", "any"}
8+
9+
# Argument role ordering for normalisation
10+
argrole_order: dict[str, int] = {
11+
"m": -1,
12+
"s": 0,
13+
"p": 1,
14+
"a": 2,
15+
"c": 3,
16+
"o": 4,
17+
"i": 5,
18+
"t": 6,
19+
"j": 7,
20+
"x": 8,
21+
"r": 9,
22+
"?": 10,
23+
}
24+
25+
# Valid argument roles by connector type
26+
valid_p_argroles: set[str] = {"s", "p", "a", "c", "o", "i", "t", "j", "x", "r", "?"}
27+
valid_b_argroles: set[str] = {"m", "a"}

src/hyperbase/correctness.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
from __future__ import annotations
2+
3+
from collections import Counter
4+
from typing import TYPE_CHECKING
5+
6+
import hyperbase.constants as const
7+
8+
if TYPE_CHECKING:
9+
from hyperbase.hyperedge import Atom, Hyperedge
10+
11+
12+
def check_correctness(edge: Hyperedge) -> dict[Hyperedge, list[tuple[str, str]]]:
13+
"""Check correctness of a hyperedge, returning errors keyed by subedge."""
14+
if edge.atom:
15+
return _check_atom(edge) # type: ignore[arg-type]
16+
return _check_edge(edge)
17+
18+
19+
def _check_atom(atom: Atom) -> dict[Hyperedge, list[tuple[str, str]]]:
20+
output: dict[Hyperedge, list[tuple[str, str]]] = {}
21+
errors: list[tuple[str, str]] = []
22+
23+
at = atom.mtype()
24+
if at not in {"C", "P", "M", "B", "T", "J"}:
25+
errors.append(("bad-atom-type", f"{at} is not a valid atom type"))
26+
27+
if len(errors) > 0:
28+
output[atom] = errors
29+
30+
return output
31+
32+
33+
def _check_edge(edge: Hyperedge) -> dict[Hyperedge, list[tuple[str, str]]]:
34+
output: dict[Hyperedge, list[tuple[str, str]]] = {}
35+
errors: list[tuple[str, str]] = []
36+
37+
ct = edge[0].mtype()
38+
# check if connector has valid type
39+
if ct not in {"P", "M", "B", "T", "J"}:
40+
errors.append(("conn-bad-type", f"connector has incorrect type: {ct}"))
41+
# check if modifier structure is correct
42+
if ct == "M":
43+
if len(edge) != 2:
44+
errors.append(("mod-1-arg", "modifiers can only have one argument"))
45+
# check if builder structure is correct
46+
elif ct == "B":
47+
if len(edge) != 3:
48+
errors.append(("build-2-args", "builders can only have two arguments"))
49+
for arg in edge[1:]:
50+
at = arg.mtype()
51+
if at != "C":
52+
e = f"builder argument {arg!s} has incorrect type: {at}"
53+
errors.append(("build-arg-bad-type", e))
54+
# check if trigger structure is correct
55+
elif ct == "T":
56+
if len(edge) != 2:
57+
errors.append(("trig-1-arg", "triggers can only have one arguments"))
58+
for arg in edge[1:]:
59+
at = arg.mtype()
60+
if at not in {"C", "R"}:
61+
e = f"trigger argument {arg!s} has incorrect type: {at}"
62+
errors.append(("trig-bad-arg-type", e))
63+
# check if predicate structure is correct
64+
elif ct == "P":
65+
for arg in edge[1:]:
66+
at = arg.mtype()
67+
if at not in {"C", "R", "S"}:
68+
e = f"predicate argument {arg!s} has incorrect type: {at}"
69+
errors.append(("pred-arg-bad-type", e))
70+
# check if conjunction structure is correct
71+
elif ct == "J" and len(edge) < 3:
72+
errors.append(
73+
("conj-2-args-min", "conjunctions must have at least two arguments")
74+
)
75+
76+
# check argrole counts
77+
if ct in {"P", "B"}:
78+
try:
79+
ars = edge.argroles()
80+
if len(ars) > 0:
81+
if ct == "P":
82+
for ar in ars:
83+
if ar not in const.valid_p_argroles:
84+
errors.append(
85+
(
86+
"pred-bad-arg-role",
87+
f"{ar} is not a valid argument role "
88+
"for connector of type P",
89+
)
90+
)
91+
elif ct == "B":
92+
for ar in ars:
93+
if ar not in const.valid_b_argroles:
94+
errors.append(
95+
(
96+
"build-bad-arg-role",
97+
f"{ar} is not a valid argument role "
98+
"for connector of type B",
99+
)
100+
)
101+
102+
if len(ars) != len(edge) - 1:
103+
errors.append(
104+
(
105+
"bad-num-argroles",
106+
"number of argroles must match number of arguments",
107+
)
108+
)
109+
110+
ars_counts = Counter(ars)
111+
if ars_counts["s"] > 1:
112+
errors.append(
113+
("argrole-s-1-max", "argrole s can only be used once")
114+
)
115+
if ars_counts["o"] > 1:
116+
errors.append(
117+
("argrole-o-1-max", "argrole o can only be used once")
118+
)
119+
if ars_counts["c"] > 1:
120+
errors.append(
121+
("argrole-c-1-max", "argrole c can only be used once")
122+
)
123+
if ars_counts["i"] > 1:
124+
errors.append(
125+
("argrole-i-1-max", "argrole i can only be used once")
126+
)
127+
if ars_counts["p"] > 1:
128+
errors.append(
129+
("argrole-p-1-max", "argrole p can only be used once")
130+
)
131+
if ars_counts["a"] > 1:
132+
errors.append(
133+
("argrole-a-1-max", "argrole a can only be used once")
134+
)
135+
else:
136+
errors.append(
137+
(
138+
"no-argroles",
139+
"Connectors of type P or B must have argument roles",
140+
)
141+
)
142+
except RuntimeError:
143+
# malformed edges are detected elsewhere
144+
pass
145+
146+
if len(errors) > 0:
147+
output[edge] = errors
148+
149+
for subedge in edge:
150+
output.update(check_correctness(subedge))
151+
152+
return output

0 commit comments

Comments
 (0)