Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d89cbc3
add initial rule generator v2 scaffolding
colinthebomb1 Apr 14, 2026
519df4c
add literals and tables in v2
colinthebomb1 Apr 14, 2026
a4f5f12
add variablize literal and table in v2
colinthebomb1 Apr 14, 2026
d3f66f3
remove regex and keep x y placeholders
colinthebomb1 Apr 14, 2026
5ed9a50
canonicalize x y placeholders
colinthebomb1 Apr 14, 2026
6c1352c
add variable list discovery in v2
colinthebomb1 Apr 14, 2026
a2e6978
add merge variable list in v2
colinthebomb1 Apr 14, 2026
a9067fb
add branches support in v2
colinthebomb1 Apr 14, 2026
408a3ee
add fingerprint support in v2
colinthebomb1 Apr 14, 2026
7afd25f
add unify variable names in v2
colinthebomb1 Apr 14, 2026
caff39d
add number of variables in v2
colinthebomb1 Apr 14, 2026
6d6d21a
add initial generate general rule in v2
colinthebomb1 Apr 14, 2026
4abae95
compound query support
colinthebomb1 Apr 21, 2026
99f3934
pass all existing tests
colinthebomb1 Apr 23, 2026
74800d4
fix tests
colinthebomb1 Apr 30, 2026
bd69c1d
remove any special rules from generalizations
colinthebomb1 Apr 30, 2026
e4996c5
migrate rule generator to v2 with full AST-based generalization
colinthebomb1 Apr 30, 2026
2aac818
remove dead code from rule_generator_v2
colinthebomb1 Apr 30, 2026
4956f66
add docstrings
colinthebomb1 Apr 30, 2026
fb7e59e
improve tests
colinthebomb1 May 5, 2026
aa65545
add v2 rule helper
colinthebomb1 May 5, 2026
159d0b5
cleanup
colinthebomb1 May 5, 2026
533f038
rule v2 fix
colinthebomb1 May 5, 2026
33e3428
Fix v2 rules
colinthebomb1 May 5, 2026
03b14cb
fix spreadsheet id 18
colinthebomb1 May 7, 2026
d11796d
address comments
colinthebomb1 May 7, 2026
73ae63d
update
colinthebomb1 May 14, 2026
ceac892
Merge branch 'main' into colin/rule-generator-v2
colinthebomb1 May 14, 2026
415aa47
fix after merge
colinthebomb1 May 14, 2026
cf2c12d
move placeholder handling to formatter
colinthebomb1 May 21, 2026
738067d
[Refactor] VariableLiteralNode, RuleV2 dataclass, and dead code clean…
colinthebomb1 Jun 12, 2026
8eb12db
fix rule actions
colinthebomb1 Jun 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/ast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
LiteralNode,
ElementVariableNode,
SetVariableNode,
VariableLiteralNode,
OperatorNode,
FunctionNode,
SelectNode,
Expand All @@ -37,6 +38,7 @@
'LiteralNode',
'ElementVariableNode',
'SetVariableNode',
'VariableLiteralNode',
'OperatorNode',
'FunctionNode',
'SelectNode',
Expand Down
1 change: 1 addition & 0 deletions core/ast/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class NodeType(Enum):
# VarSQL specific
VAR = "var"
VARSET = "varset"
VAR_LITERAL = "var_literal"

# Operators
OPERATOR = "operator"
Expand Down
66 changes: 51 additions & 15 deletions core/ast/node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import List, Set, Optional, Union
from typing import List, Set, Optional, Tuple, Union
from abc import ABC

from .enums import NodeType, JoinType, SortOrder
Expand Down Expand Up @@ -46,7 +46,7 @@ def __hash__(self):

class TableNode(Node):
"""Table reference node"""
def __init__(self, _name: str, _alias: Optional[str] = None, **kwargs):
def __init__(self, _name: str, _alias: Optional[Union[str, 'ElementVariableNode']] = None, **kwargs):
super().__init__(NodeType.TABLE, **kwargs)
self.name = _name
self.alias = _alias
Expand Down Expand Up @@ -80,7 +80,7 @@ def __hash__(self):

class ColumnNode(Node):
"""Column reference node"""
def __init__(self, _name: str, _alias: Optional[str] = None, _parent_alias: Optional[str] = None, _parent: Optional[TableNode|SubqueryNode] = None, **kwargs):
def __init__(self, _name: str, _alias: Optional[Union[str, 'ElementVariableNode']] = None, _parent_alias: Optional[Union[str, 'ElementVariableNode']] = None, _parent: Optional[TableNode|SubqueryNode] = None, **kwargs):
super().__init__(NodeType.COLUMN, **kwargs)
self.name = _name
self.alias = _alias
Expand All @@ -101,18 +101,20 @@ def __hash__(self):

class LiteralNode(Node):
"""Literal value node"""
def __init__(self, _value: str|int|float|bool|datetime|None, **kwargs):
def __init__(self, _value: str|int|float|bool|datetime|None, _alias: Optional[str] = None, **kwargs):
super().__init__(NodeType.LITERAL, **kwargs)
self.value = _value
self.alias = _alias

def __eq__(self, other):
if not isinstance(other, LiteralNode):
return False
return (super().__eq__(other) and
self.value == other.value)
self.value == other.value and
self.alias == other.alias)

def __hash__(self):
return hash((super().__hash__(), self.value))
return hash((super().__hash__(), self.value, self.alias))

class DataTypeNode(Node):
"""SQL data type node used in CAST expressions (e.g. TEXT, DATE, INTEGER)"""
Expand Down Expand Up @@ -170,17 +172,19 @@ def __hash__(self):

class ElementVariableNode(Node):
"""Rule element variable ``<name>`` (see ``VarType.ElementVariable`` in rule_parser_v2)."""
def __init__(self, _name: str, **kwargs):
def __init__(self, _name: str, parent_alias: Optional[Union[str, 'ElementVariableNode']] = None, alias: Optional[Union[str, 'ElementVariableNode']] = None, **kwargs):
super().__init__(NodeType.VAR, **kwargs)
self.name = _name
self.parent_alias = parent_alias
self.alias = alias

def __eq__(self, other):
if not isinstance(other, ElementVariableNode):
return False
return super().__eq__(other) and self.name == other.name
return super().__eq__(other) and self.name == other.name and self.parent_alias == other.parent_alias and self.alias == other.alias

def __hash__(self):
return hash((super().__hash__(), self.name))
return hash((super().__hash__(), self.name, self.parent_alias, self.alias))


class SetVariableNode(Node):
Expand All @@ -198,6 +202,31 @@ def __hash__(self):
return hash((super().__hash__(), self.name))


class VariableLiteralNode(Node):
"""A string literal placeholder, e.g. ``'%<x1>%'`` in a LIKE predicate.

``prefix`` and ``suffix`` capture surrounding wildcard characters so
``LIKE '%foo%'`` → ``VariableLiteralNode('x1', prefix='%', suffix='%')``.
"""
def __init__(self, _name: str, prefix: str = "", suffix: str = "",
_alias: Optional[str] = None, **kwargs):
super().__init__(NodeType.VAR_LITERAL, **kwargs)
self.name = _name
self.prefix = prefix
self.suffix = suffix
self.alias = _alias

def __eq__(self, other):
if not isinstance(other, VariableLiteralNode):
return False
return (super().__eq__(other) and self.name == other.name
and self.prefix == other.prefix and self.suffix == other.suffix
and self.alias == other.alias)

def __hash__(self):
return hash((super().__hash__(), self.name, self.prefix, self.suffix, self.alias))


class OperatorNode(Node):
"""Operator node"""
def __init__(self, _left: Node, _name: str, _right: Optional[Node] = None, **kwargs):
Expand Down Expand Up @@ -229,7 +258,7 @@ def __init__(self, _operand: Node, _name: str, **kwargs):

class FunctionNode(Node):
"""Function call node"""
def __init__(self, _name: str, _args: Optional[List[Node]] = None, _alias: Optional[str] = None, **kwargs):
def __init__(self, _name: str, _args: Optional[List[Node]] = None, _alias: Optional[Union[str, 'ElementVariableNode']] = None, **kwargs):
if _args is None:
_args = []
super().__init__(NodeType.FUNCTION, children=_args, **kwargs)
Expand All @@ -249,24 +278,31 @@ def __hash__(self):

class JoinNode(Node):
"""JOIN clause node"""
def __init__(self, _left_table: Union['TableNode', 'JoinNode', 'SubqueryNode'], _right_table: Union['TableNode', 'SubqueryNode'], _join_type: JoinType = JoinType.INNER, _on_condition: Optional['Node'] = None, **kwargs):
def __init__(self, _left_table: Union['TableNode', 'JoinNode', 'SubqueryNode'], _right_table: Union['TableNode', 'SubqueryNode'], _join_type: JoinType = JoinType.INNER, _on_condition: Optional['Node'] = None, _using: Optional[List['Node']] = None, **kwargs):
children = [_left_table, _right_table]
if _on_condition:
children.append(_on_condition)
if _using:
children.extend(_using)
super().__init__(NodeType.JOIN, children=children, **kwargs)
self.left_table = _left_table
self.right_table = _right_table
self.join_type = _join_type
self.on_condition = _on_condition

self.using = list(_using) if _using else None

def __eq__(self, other):
if not isinstance(other, JoinNode):
return False
return (super().__eq__(other) and
self.join_type == other.join_type)
self.join_type == other.join_type and
self.using == other.using)

def __hash__(self):
return hash((super().__hash__(), self.join_type))
using_key: Tuple = ()
if self.using:
using_key = tuple(self.using)
return hash((super().__hash__(), self.join_type, using_key))

# ============================================================================
# Query Structure Nodes
Expand Down Expand Up @@ -463,4 +499,4 @@ def __eq__(self, other):
return super().__eq__(other) and self.whens == other.whens and self.else_val == other.else_val

def __hash__(self):
return hash((super().__hash__(), tuple(self.whens), self.else_val))
return hash((super().__hash__(), tuple(self.whens), self.else_val))
Loading
Loading