Skip to content

Commit 684cb47

Browse files
committed
This is not really AQA, I just put it here
This is an attempt at an interpreter from the book "Crafting Interpriters". I'm just a copy monkey that is stepping through the book like a chimpo Pretty cool though! I have a working scanner and expression parser! and it parses! .. holy shit!
1 parent 95443fe commit 684cb47

1 file changed

Lines changed: 353 additions & 0 deletions

File tree

  • teachprogramming/static/language_reference/languages/aqa
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
import enum
2+
from typing import NamedTuple, Self, Callable
3+
from collections.abc import Sequence, MutableSequence, Mapping
4+
from functools import cached_property
5+
from pprint import pprint as pp
6+
import functools
7+
import logging
8+
9+
10+
log = logging.getLogger(__name__)
11+
12+
# https://filestore.aqa.org.uk/resources/computing/AQA-8525-NG-PC.PDF
13+
14+
# https://www.geeksforgeeks.org/compiler-design/recursive-descent-parser/
15+
# https://craftinginterpreters.com/parsing-expressions.html
16+
17+
18+
19+
20+
class TokenType(enum.StrEnum):
21+
IDENTIFIER = enum.auto()
22+
STRING = enum.auto()
23+
NUMBER = enum.auto()
24+
EOF = enum.auto()
25+
26+
BANG_EQUAL = '!='
27+
EQUAL_EQUAL = '=='
28+
LESS_EQUAL = '<='
29+
GREATER_EQUAL = '>='
30+
31+
LEFT_PAREN = '('
32+
RIGHT_PAREN = ')'
33+
LEFT_BRACE = '{'
34+
RIGHT_BRACE = '}'
35+
COMMA = ','
36+
DOT = '.'
37+
MINUS = '-'
38+
PLUS = '+'
39+
SEMICOLON = ';'
40+
STAR = '*'
41+
BANG = '!'
42+
GREATER = '>'
43+
LESS = '<'
44+
EQUAL = '='
45+
SLASH = '/'
46+
47+
AND = enum.auto()
48+
CLASS = enum.auto()
49+
ELSE = enum.auto()
50+
FALSE = enum.auto()
51+
FOR = enum.auto()
52+
IF = enum.auto()
53+
NIL = enum.auto()
54+
OR = enum.auto()
55+
PRINT = enum.auto()
56+
RETURN = enum.auto()
57+
SUPER = enum.auto()
58+
THIS = enum.auto()
59+
TRUE = enum.auto()
60+
VAR = enum.auto()
61+
WHILE = enum.auto()
62+
63+
# ------------------------------------------------------------------------------
64+
65+
class TextLocation(NamedTuple):
66+
line: int
67+
col: int
68+
def __str__(self) -> str:
69+
return f'Line: {self.line} Col: {self.col}'
70+
class Token(NamedTuple):
71+
location: TextLocation
72+
lexeme: str
73+
type: TokenType
74+
literal: str
75+
class TokenError(NamedTuple):
76+
location: TextLocation
77+
message: str
78+
class MutableTextLocation():
79+
line: int = 0
80+
col: int = 0
81+
@property
82+
def immutable(self) -> TextLocation:
83+
return TextLocation(self.line, self.col)
84+
def newLine(self):
85+
self.line += 1
86+
self.col = 0
87+
88+
type wasConsumed = bool | None
89+
TokenHandler = Callable[['Scanner'], wasConsumed]
90+
91+
class Scanner():
92+
def __init__(self, source: str, token_handlers: Sequence[TokenHandler]):
93+
assert source
94+
assert token_handlers
95+
self.source = source
96+
self.token_handlers = token_handlers
97+
self.location = MutableTextLocation()
98+
self.index_start: int = 0
99+
self.index_current: int = 0
100+
self._tokens: MutableSequence[Token] = []
101+
self._errors: MutableSequence[TokenError] = []
102+
@property
103+
def isAtEnd(self) -> bool:
104+
return self.index_current >= len(self.source)
105+
def advance(self, inc=1) -> None:
106+
self.index_current += inc
107+
self.location.col += inc
108+
def match(self, expected: str) -> bool:
109+
if expected != self.peek(len(expected)): return False
110+
self.advance(len(expected))
111+
return True
112+
def peek(self, offset:int=1) -> str:
113+
return self.source[self.index_current : min(self.index_current+offset, len(self.source))]
114+
def addToken(self, type: TokenType, literal: str = '') -> None:
115+
self._tokens.append(Token(
116+
location=self.location.immutable,
117+
lexeme=self.source[self.index_start:self.index_current],
118+
type=type,
119+
literal=literal
120+
))
121+
@cached_property
122+
def tokens(self) -> Sequence[Token]:
123+
while not self.isAtEnd:
124+
self.index_start = self.index_current
125+
if not any(token_handler(self) for token_handler in self.token_handlers):
126+
self._errors.append(TokenError(self.location.immutable, f"Unexpected character {self.peek()}"))
127+
self.addToken(TokenType.EOF, '')
128+
return self._tokens
129+
130+
131+
132+
133+
def white_space(s: Scanner) -> wasConsumed:
134+
return any((s.match(i) for i in (' ', '\t', '\r')))
135+
136+
def new_line(s: Scanner) -> wasConsumed:
137+
if s.match('\n'):
138+
s.location.newLine()
139+
return True
140+
141+
def comment(s: Scanner) -> wasConsumed:
142+
if s.match('#'):
143+
while (s.peek() not in ('\n', '')):
144+
s.advance()
145+
146+
def string(s: Scanner) -> wasConsumed:
147+
if s.match('"'):
148+
while s.peek() != '"' and not s.isAtEnd:
149+
s.advance()
150+
if s.peek() == '\n': s.location.newLine()
151+
s.advance()
152+
if s.isAtEnd:
153+
s._errors.append(TokenError(s.location.immutable, 'Unterminated string'))
154+
s.addToken(TokenType.STRING, s.source[s.index_start+1:s.index_current-1])
155+
156+
def number(s: Scanner) -> wasConsumed:
157+
if s.peek().isdigit():
158+
while s.peek().isdigit(): s.advance()
159+
peek = s.peek(2)
160+
if len(peek)==2 and peek[0] == '.' and peek[1].isdigit():
161+
s.advance() # consume the '.'
162+
while s.peek().isdigit(): s.advance()
163+
s.addToken(TokenType.NUMBER, s.source[s.index_start:s.index_current])
164+
165+
KEYWORDS = frozenset((
166+
TokenType.AND,
167+
TokenType.CLASS,
168+
TokenType.ELSE,
169+
TokenType.FALSE,
170+
TokenType.FOR,
171+
TokenType.IF,
172+
TokenType.NIL,
173+
TokenType.OR,
174+
TokenType.PRINT,
175+
TokenType.RETURN,
176+
TokenType.SUPER,
177+
TokenType.THIS,
178+
TokenType.TRUE,
179+
TokenType.VAR,
180+
TokenType.WHILE,
181+
))
182+
183+
def identifier(s: Scanner) -> wasConsumed:
184+
if s.peek().isalpha():
185+
while s.peek().isalnum(): s.advance()
186+
text = s.source[s.index_start:s.index_current]
187+
s.addToken(TokenType(text) if text in KEYWORDS else TokenType.IDENTIFIER, text)
188+
189+
def createDefaultTokenHandlerFor(t: str) -> TokenHandler:
190+
def _t(s: Scanner) -> wasConsumed:
191+
if s.match(t):
192+
s.addToken(TokenType(t), t)
193+
return True
194+
return _t
195+
196+
DEFAULT_TOKEN_HANDLERS: Sequence[TokenHandler] = (
197+
white_space,
198+
new_line,
199+
comment,
200+
string,
201+
number,
202+
identifier,
203+
*map(createDefaultTokenHandlerFor, ('!=', '==', '<=', '>=', '(', ')', '{', '}', ',', '.', '-', '+', ';', '*', '!', '>', '<', '=', '/'))
204+
)
205+
206+
207+
def test_scanner():
208+
tokens = Scanner('thing = ("test" + 1.23) # This is a comment', DEFAULT_TOKEN_HANDLERS).tokens
209+
token_types = tuple(t.type for t in tokens)
210+
assert token_types == (
211+
TokenType.IDENTIFIER,
212+
TokenType.EQUAL,
213+
TokenType.LEFT_PAREN,
214+
TokenType.STRING,
215+
TokenType.PLUS,
216+
TokenType.NUMBER,
217+
TokenType.RIGHT_PAREN,
218+
TokenType.EOF,
219+
)
220+
221+
222+
# ------------------------------------------------------------------------------
223+
224+
import abc
225+
226+
class Expr(abc.ABC):
227+
pass
228+
class Literal(Expr):
229+
def __init__(self, literal: str|bool|None|int|float):
230+
self.literal = literal
231+
def __str__(self) -> str:
232+
return str(self.literal)
233+
class Unary(Expr):
234+
def __init__(self, operator: Token, expression: Expr):
235+
self.operator = operator
236+
self.expression = expression
237+
def __str__(self) -> str:
238+
return f'{self.operator.type.value}{self.expression}'
239+
class Binary(Expr):
240+
def __init__(self, expression1: Expr, operator: Token, expression2: Expr):
241+
self.expression1 = expression1
242+
self.operator = operator
243+
self.expression2 = expression2
244+
def __str__(self) -> str:
245+
return ''.join(map(str, (self.expression1, self.operator.type.value, self.expression2)))
246+
class Grouping(Expr):
247+
def __init__(self, expression: Expr):
248+
self.expression = expression
249+
def __str__(self) -> str:
250+
return f'({self.expression})'
251+
252+
253+
class Parser():
254+
class ParseError(BaseException): ...
255+
256+
def __init__(self, tokens: Sequence[Token]):
257+
self.tokens = tokens
258+
self.index_current: int = 0
259+
260+
@property
261+
def parse(self) -> Expr | None:
262+
try:
263+
return self.expression()
264+
except self.ParseError as pe:
265+
return None
266+
267+
@property
268+
def peek(self) -> Token:
269+
return self.tokens[self.index_current]
270+
@property
271+
def previous(self) -> Token:
272+
return self.tokens[self.index_current - 1]
273+
@property
274+
def advance(self) -> Token:
275+
if not self.isAtEnd: self.index_current += 1
276+
return self.previous
277+
@property
278+
def isAtEnd(self) -> bool:
279+
return self.peek.type == TokenType.EOF
280+
def check(self, type: TokenType) -> bool:
281+
if self.isAtEnd: return False
282+
return self.peek.type == type
283+
def match(self, *types: TokenType) -> Token | None:
284+
for t in types:
285+
if self.check(t):
286+
return self.advance
287+
def consume(self, type: TokenType, error_message: str) -> Token:
288+
if self.check(type): return self.advance
289+
raise self.error(self.peek, error_message)
290+
291+
def error(self, token: Token, error_message: str) -> 'Parser.ParseError':
292+
log.error(f'{token.location} - {token.lexeme} - {error_message}')
293+
return self.ParseError()
294+
def synchronize(self) -> None:
295+
# humm ... sure this can be python-ed and simplified
296+
self.advance
297+
while not self.isAtEnd:
298+
if self.previous.type == TokenType.SEMICOLON: return
299+
match self.peek.type:
300+
case (TokenType.CLASS, TokenType.VAR, TokenType.FOR, TokenType.IF, TokenType.WHILE, TokenType.PRINT, TokenType.RETURN):
301+
return
302+
self.advance
303+
"""
304+
expression → equality ;
305+
equality → comparison ( ( "!=" | "==" ) comparison )* ;
306+
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
307+
term → factor ( ( "-" | "+" ) factor )* ;
308+
factor → unary ( ( "/" | "*" ) unary )* ;
309+
unary → ( "!" | "-" ) unary | primary ;
310+
primary → NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
311+
"""
312+
def expression(self) -> Expr:
313+
return self.equality()
314+
def equality(self) -> Expr:
315+
expr = self.comparison()
316+
while operator := self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL):
317+
expr = Binary(expr, operator, self.comparison())
318+
return expr
319+
def comparison(self) -> Expr:
320+
expr = self.term()
321+
while operator := self.match(TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL):
322+
expr = Binary(expr, operator, self.term())
323+
return expr
324+
def term(self) -> Expr:
325+
expr = self.factor()
326+
while operator := self.match(TokenType.MINUS, TokenType.PLUS):
327+
expr = Binary(expr, operator, self.factor())
328+
return expr
329+
def factor(self) -> Expr:
330+
expr = self.unary()
331+
while operator := self.match(TokenType.SLASH, TokenType.STAR):
332+
expr = Binary(expr, operator, self.unary())
333+
return expr
334+
def unary(self) -> Expr:
335+
if operator := self.match(TokenType.BANG, TokenType.MINUS):
336+
return Unary(operator, self.unary())
337+
return self.primary()
338+
def primary(self) -> Expr:
339+
if self.match(TokenType.FALSE): return Literal(False)
340+
if self.match(TokenType.TRUE): return Literal(True)
341+
if self.match(TokenType.NIL): return Literal(None)
342+
if token := self.match(TokenType.NUMBER, TokenType.STRING): return Literal(token.literal)
343+
if self.match(TokenType.LEFT_PAREN):
344+
expr = self.expression()
345+
self.consume(TokenType.RIGHT_PAREN, "Expect ')' after expression.")
346+
return Grouping(expr)
347+
raise self.error(self.peek, "Expect expression.")
348+
349+
350+
def test_parser():
351+
tokens = Scanner('12.3 * (45 - "test") >= !10', DEFAULT_TOKEN_HANDLERS).tokens
352+
expr = Parser(tokens).parse
353+
assert False

0 commit comments

Comments
 (0)