hyperquest-hq
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/hyperbase/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/hyperbase/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hyperbase/builders.py‎
Lines changed: 177 additions & 0 deletions b/‎src/hyperbase/builders.py‎
Lines changed: 177 additions & 0 deletions
diff --git a/‎src/hyperbase/cli/repl.py‎
Lines changed: 2 additions & 1 deletion b/‎src/hyperbase/cli/repl.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/hyperbase/constants.py‎
Lines changed: 20 additions & 0 deletions b/‎src/hyperbase/constants.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/hyperbase/correctness.py‎
Lines changed: 152 additions & 0 deletions b/‎src/hyperbase/correctness.py‎
Lines changed: 152 additions & 0 deletions
@@ -7,6 +7,7 @@
 
 ### Changed
 - multiple patterns functions are now Hyperedge/Atom methods: is_wildcard, is_pattern, is_fun_pattern, is_variable, contains_variable, variable_name
+- hyperbase.py now delegating to smaller modules with well-defined concerns: builders.py, correctness.py, transforms.py, patterns.checks.py and patterns.matcher.py.
 
 ### Removed
 
 
@@ -1,4 +1,4 @@
-from hyperbase.hyperedge import hedge
+from hyperbase.builders import hedge
 from hyperbase.loaders import load_edges
 from hyperbase.parsers import get_parser
 
 
@@ -0,0 +1,177 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, cast
+
+from hyperbase.hyperedge import Atom, Hyperedge, UniqueAtom
+
+if TYPE_CHECKING:
+    from hyperbase.parsers.parse_result import ParseResult
+
+
+def str_to_atom(s: str) -> str:
+    """Converts a string into a valid atom."""
+    atom = s.lower()
+
+    atom = atom.replace("%", "%25")
+    atom = atom.replace("/", "%2f")
+    atom = atom.replace(" ", "%20")
+    atom = atom.replace("(", "%28")
+    atom = atom.replace(")", "%29")
+    atom = atom.replace(".", "%2e")
+    atom = atom.replace("*", "%2a")
+    atom = atom.replace("&", "%26")
+    atom = atom.replace("@", "%40")
+    atom = atom.replace("\n", "%0a")
+    atom = atom.replace("\r", "%0d")
+
+    return atom
+
+
+def _edge_str_has_outer_parens(edge_str: str) -> bool:
+    """Check if string representation of edge is delimited by outer
+    parenthesis.
+    """
+    if len(edge_str) < 2:
+        return False
+    return edge_str[0] == "("
+
+
+def split_edge_str(edge_str: str) -> tuple[str, ...]:
+    """Shallow split into tokens of a string representation of an edge,
+    without outer parenthesis.
+    """
+    start = 0
+    depth = 0
+    str_length = len(edge_str)
+    active = 0
+    tokens: list[str] = []
+    for i in range(str_length):
+        c = edge_str[i]
+        if c == " ":
+            if active and depth == 0:
+                tokens.append(edge_str[start:i])
+                active = 0
+        elif c == "(":
+            if depth == 0:
+                active = 1
+                start = i
+            depth += 1
+        elif c == ")":
+            depth -= 1
+            if depth == 0:
+                tokens.append(edge_str[start : i + 1])
+                active = 0
+            elif depth < 0:
+                raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
+        else:
+            if not active:
+                active = 1
+                start = i
+
+    if active:
+        if depth > 0:
+            raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
+        else:
+            tokens.append(edge_str[start:])
+
+    return tuple(tokens)
+
+
+def _parsed_token(token: str) -> Hyperedge:
+    if _edge_str_has_outer_parens(token):
+        return hedge(token)
+    else:
+        return Atom(token)
+
+
+def _collect_positions(tok_pos: Hyperedge) -> list[int]:
+    """Collect all valid (>= 0) token positions from a tok_pos tree."""
+    if tok_pos.atom:
+        pos = int(str(tok_pos))
+        return [pos] if pos >= 0 else []
+    else:
+        positions: list[int] = []
+        for sub in tok_pos:
+            positions.extend(_collect_positions(sub))
+        return positions
+
+
+def _rebuild_with_text(
+    edge: Hyperedge,
+    tok_pos: Hyperedge,
+    tokens: list[str],
+) -> Hyperedge:
+    """Recursively rebuild an edge, assigning text from tokens and tok_pos."""
+    if edge.atom:
+        atom = cast(Atom, edge)
+        pos = int(str(tok_pos))
+        text = tokens[pos] if pos >= 0 else None
+        return Atom(str(atom), atom.parens, text=text)
+    else:
+        new_children = tuple(
+            _rebuild_with_text(sub_edge, sub_tok_pos, tokens)
+            for sub_edge, sub_tok_pos in zip(edge, tok_pos, strict=False)
+        )
+        positions = _collect_positions(tok_pos)
+        if positions:
+            min_pos = min(positions)
+            max_pos = max(positions)
+            text = " ".join(tokens[min_pos : max_pos + 1])
+        else:
+            text = None
+        return Hyperedge(new_children, text=text)
+
+
+def hedge(
+    source: str | Hyperedge | list | tuple | ParseResult,
+) -> Hyperedge:
+    """Create a hyperedge."""
+    # Check for ParseResult via duck typing to avoid circular import
+    if (
+        hasattr(source, "tok_pos")
+        and hasattr(source, "tokens")
+        and hasattr(source, "edge")
+    ):
+        from hyperbase.parsers import ParseResult
+
+        _source = cast(ParseResult, source)
+        edge = _rebuild_with_text(_source.edge, _source.tok_pos, _source.tokens)
+        object.__setattr__(edge, "text", _source.text)
+        return edge
+    if type(source) in {tuple, list}:
+        _source = cast(Iterable, source)
+        return Hyperedge(tuple(hedge(item) for item in _source))
+    elif type(source) is str:
+        edge_str = source.strip().replace("\n", " ")
+        edge_inner_str = edge_str
+
+        parens = _edge_str_has_outer_parens(edge_str)
+        if parens:
+            edge_inner_str = edge_str[1:-1]
+
+        tokens = split_edge_str(edge_inner_str)
+        if not tokens:
+            raise ValueError(f"Edge string is empty: '{source}'")
+        edges = tuple(_parsed_token(token) for token in tokens)
+        if len(edges) == 1 and isinstance(edges[0], Atom):
+            return Atom(str(edges[0]), parens)
+        elif len(edges) > 0:
+            return Hyperedge(edges)
+        else:
+            raise ValueError(f"Edge string is empty: '{source}'")
+    elif type(source) in {Hyperedge, Atom, UniqueAtom}:
+        return source  # type: ignore
+    else:
+        raise TypeError(
+            f"Cannot create hyperedge from {type(source).__name__}: {source!r}"
+        )
+
+
+def build_atom(text: str, *parts: str) -> Atom:
+    """Build an atom from text and other parts."""
+    atom = str_to_atom(text)
+    parts_str = "/".join([part for part in parts if part])
+    if len(parts_str) > 0:
+        atom_str = "".join((atom, "/", parts_str))
+    return Atom(atom_str)
@@ -19,7 +19,8 @@
 from rich.text import Text
 from rich.tree import Tree
 
-from hyperbase.hyperedge import Atom, Hyperedge, hedge
+from hyperbase.builders import hedge
+from hyperbase.hyperedge import Atom, Hyperedge
 from hyperbase.parsers import Parser, get_parser, list_parsers
 from hyperbase.parsers.correctness import badness_check
 
 
@@ -5,3 +5,23 @@
 
 # Pattern functions
 PATTERN_FUNCTIONS: set[str] = {"var", "atoms", "lemma", "any"}
+
+# Argument role ordering for normalisation
+argrole_order: dict[str, int] = {
+    "m": -1,
+    "s": 0,
+    "p": 1,
+    "a": 2,
+    "c": 3,
+    "o": 4,
+    "i": 5,
+    "t": 6,
+    "j": 7,
+    "x": 8,
+    "r": 9,
+    "?": 10,
+}
+
+# Valid argument roles by connector type
+valid_p_argroles: set[str] = {"s", "p", "a", "c", "o", "i", "t", "j", "x", "r", "?"}
+valid_b_argroles: set[str] = {"m", "a"}
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+from collections import Counter
+from typing import TYPE_CHECKING
+
+import hyperbase.constants as const
+
+if TYPE_CHECKING:
+    from hyperbase.hyperedge import Atom, Hyperedge
+
+
+def check_correctness(edge: Hyperedge) -> dict[Hyperedge, list[tuple[str, str]]]:
+    """Check correctness of a hyperedge, returning errors keyed by subedge."""
+    if edge.atom:
+        return _check_atom(edge)  # type: ignore[arg-type]
+    return _check_edge(edge)
+
+
+def _check_atom(atom: Atom) -> dict[Hyperedge, list[tuple[str, str]]]:
+    output: dict[Hyperedge, list[tuple[str, str]]] = {}
+    errors: list[tuple[str, str]] = []
+
+    at = atom.mtype()
+    if at not in {"C", "P", "M", "B", "T", "J"}:
+        errors.append(("bad-atom-type", f"{at} is not a valid atom type"))
+
+    if len(errors) > 0:
+        output[atom] = errors
+
+    return output
+
+
+def _check_edge(edge: Hyperedge) -> dict[Hyperedge, list[tuple[str, str]]]:
+    output: dict[Hyperedge, list[tuple[str, str]]] = {}
+    errors: list[tuple[str, str]] = []
+
+    ct = edge[0].mtype()
+    # check if connector has valid type
+    if ct not in {"P", "M", "B", "T", "J"}:
+        errors.append(("conn-bad-type", f"connector has incorrect type: {ct}"))
+    # check if modifier structure is correct
+    if ct == "M":
+        if len(edge) != 2:
+            errors.append(("mod-1-arg", "modifiers can only have one argument"))
+    # check if builder structure is correct
+    elif ct == "B":
+        if len(edge) != 3:
+            errors.append(("build-2-args", "builders can only have two arguments"))
+        for arg in edge[1:]:
+            at = arg.mtype()
+            if at != "C":
+                e = f"builder argument {arg!s} has incorrect type: {at}"
+                errors.append(("build-arg-bad-type", e))
+    # check if trigger structure is correct
+    elif ct == "T":
+        if len(edge) != 2:
+            errors.append(("trig-1-arg", "triggers can only have one arguments"))
+        for arg in edge[1:]:
+            at = arg.mtype()
+            if at not in {"C", "R"}:
+                e = f"trigger argument {arg!s} has incorrect type: {at}"
+                errors.append(("trig-bad-arg-type", e))
+    # check if predicate structure is correct
+    elif ct == "P":
+        for arg in edge[1:]:
+            at = arg.mtype()
+            if at not in {"C", "R", "S"}:
+                e = f"predicate argument {arg!s} has incorrect type: {at}"
+                errors.append(("pred-arg-bad-type", e))
+    # check if conjunction structure is correct
+    elif ct == "J" and len(edge) < 3:
+        errors.append(
+            ("conj-2-args-min", "conjunctions must have at least two arguments")
+        )
+
+    # check argrole counts
+    if ct in {"P", "B"}:
+        try:
+            ars = edge.argroles()
+            if len(ars) > 0:
+                if ct == "P":
+                    for ar in ars:
+                        if ar not in const.valid_p_argroles:
+                            errors.append(
+                                (
+                                    "pred-bad-arg-role",
+                                    f"{ar} is not a valid argument role "
+                                    "for connector of type P",
+                                )
+                            )
+                elif ct == "B":
+                    for ar in ars:
+                        if ar not in const.valid_b_argroles:
+                            errors.append(
+                                (
+                                    "build-bad-arg-role",
+                                    f"{ar} is not a valid argument role "
+                                    "for connector of type B",
+                                )
+                            )
+
+                if len(ars) != len(edge) - 1:
+                    errors.append(
+                        (
+                            "bad-num-argroles",
+                            "number of argroles must match number of arguments",
+                        )
+                    )
+
+                ars_counts = Counter(ars)
+                if ars_counts["s"] > 1:
+                    errors.append(
+                        ("argrole-s-1-max", "argrole s can only be used once")
+                    )
+                if ars_counts["o"] > 1:
+                    errors.append(
+                        ("argrole-o-1-max", "argrole o can only be used once")
+                    )
+                if ars_counts["c"] > 1:
+                    errors.append(
+                        ("argrole-c-1-max", "argrole c can only be used once")
+                    )
+                if ars_counts["i"] > 1:
+                    errors.append(
+                        ("argrole-i-1-max", "argrole i can only be used once")
+                    )
+                if ars_counts["p"] > 1:
+                    errors.append(
+                        ("argrole-p-1-max", "argrole p can only be used once")
+                    )
+                if ars_counts["a"] > 1:
+                    errors.append(
+                        ("argrole-a-1-max", "argrole a can only be used once")
+                    )
+            else:
+                errors.append(
+                    (
+                        "no-argroles",
+                        "Connectors of type P or B must have argument roles",
+                    )
+                )
+        except RuntimeError:
+            # malformed edges are detected elsewhere
+            pass
+
+    if len(errors) > 0:
+        output[edge] = errors
+
+    for subedge in edge:
+        output.update(check_correctness(subedge))
+
+    return output
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from hyperbase.hyperedge import hedge`
	`1`	`+from hyperbase.builders import hedge`
`2`	`2`	`from hyperbase.loaders import load_edges`
`3`	`3`	`from hyperbase.parsers import get_parser`
`4`	`4`