Skip to content

Commit 8a78a91

Browse files
authored
Merge pull request #1 from abstra-app/fix/parser-precedence-and-lexer-keywords
Fix operator precedence and lexer keyword matching
2 parents ae7e6bf + cb37f60 commit 8a78a91

3 files changed

Lines changed: 386 additions & 156 deletions

File tree

abstra_json_sql/eval_test.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,3 +1034,219 @@ def test_delete_all(self):
10341034
result = eval_sql(code=code, tables=tables, ctx=ctx)
10351035
self.assertIsNone(result)
10361036
self.assertEqual(tables.get_table("foo").data, [])
1037+
1038+
def test_reserved_keyword_as_column_name_select(self):
1039+
"""Test that SQL reserved keywords can be used as column names with quotes"""
1040+
code = 'select "select", "update", "where" from data'
1041+
tables = InMemoryTables(
1042+
tables=[
1043+
Table(
1044+
name="data",
1045+
columns=[
1046+
Column(name="select", schema=ColumnType.string),
1047+
Column(name="update", schema=ColumnType.string),
1048+
Column(name="where", schema=ColumnType.string),
1049+
],
1050+
data=[
1051+
{"select": "val1", "update": "val2", "where": "val3"},
1052+
{"select": "val4", "update": "val5", "where": "val6"},
1053+
],
1054+
)
1055+
],
1056+
)
1057+
ctx = {}
1058+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1059+
self.assertEqual(
1060+
result,
1061+
[
1062+
{"select": "val1", "update": "val2", "where": "val3"},
1063+
{"select": "val4", "update": "val5", "where": "val6"},
1064+
],
1065+
)
1066+
1067+
def test_reserved_keyword_as_column_name_insert(self):
1068+
"""Test INSERT with reserved keywords as column names"""
1069+
code = "insert into data (\"select\", \"update\", \"where\") values ('a', 'b', 'c')"
1070+
tables = InMemoryTables(
1071+
tables=[
1072+
Table(
1073+
name="data",
1074+
columns=[
1075+
Column(name="select", schema=ColumnType.string),
1076+
Column(name="update", schema=ColumnType.string),
1077+
Column(name="where", schema=ColumnType.string),
1078+
],
1079+
data=[],
1080+
)
1081+
],
1082+
)
1083+
ctx = {}
1084+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1085+
self.assertIsNone(result)
1086+
self.assertEqual(
1087+
tables.get_table("data").data,
1088+
[{"select": "a", "update": "b", "where": "c"}],
1089+
)
1090+
1091+
def test_reserved_keyword_as_column_name_update(self):
1092+
"""Test UPDATE with reserved keywords as column names"""
1093+
code = "update data set \"update\" = 'new_value' where \"select\" = 'a'"
1094+
tables = InMemoryTables(
1095+
tables=[
1096+
Table(
1097+
name="data",
1098+
columns=[
1099+
Column(name="select", schema=ColumnType.string),
1100+
Column(name="update", schema=ColumnType.string),
1101+
],
1102+
data=[
1103+
{"select": "a", "update": "old"},
1104+
{"select": "b", "update": "keep"},
1105+
],
1106+
)
1107+
],
1108+
)
1109+
ctx = {}
1110+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1111+
self.assertIsNone(result)
1112+
self.assertEqual(
1113+
tables.get_table("data").data,
1114+
[
1115+
{"select": "a", "update": "new_value"},
1116+
{"select": "b", "update": "keep"},
1117+
],
1118+
)
1119+
1120+
def test_reserved_keyword_as_column_name_where(self):
1121+
"""Test WHERE clause with reserved keywords as column names"""
1122+
code = "select * from data where \"select\" = 'test'"
1123+
tables = InMemoryTables(
1124+
tables=[
1125+
Table(
1126+
name="data",
1127+
columns=[
1128+
Column(name="select", schema=ColumnType.string),
1129+
Column(name="id", schema="int"),
1130+
],
1131+
data=[
1132+
{"id": 1, "select": "test"},
1133+
{"id": 2, "select": "other"},
1134+
{"id": 3, "select": "test"},
1135+
],
1136+
)
1137+
],
1138+
)
1139+
ctx = {}
1140+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1141+
self.assertEqual(
1142+
result,
1143+
[
1144+
{"id": 1, "select": "test"},
1145+
{"id": 3, "select": "test"},
1146+
],
1147+
)
1148+
1149+
def test_reserved_keyword_without_quotes_should_fail(self):
1150+
"""Test that reserved keywords without quotes should fail during parsing"""
1151+
code = "insert into data (select, update, where) values ('a', 'b', 'c')"
1152+
tables = InMemoryTables(
1153+
tables=[
1154+
Table(
1155+
name="data",
1156+
columns=[
1157+
Column(name="select", schema=ColumnType.string),
1158+
Column(name="update", schema=ColumnType.string),
1159+
Column(name="where", schema=ColumnType.string),
1160+
],
1161+
data=[],
1162+
)
1163+
],
1164+
)
1165+
ctx = {}
1166+
# Should raise an assertion error because parser expects column name but gets keyword
1167+
with self.assertRaises(AssertionError):
1168+
eval_sql(code=code, tables=tables, ctx=ctx)
1169+
1170+
def test_where_clause_with_column_reference(self):
1171+
"""Test WHERE clause can properly resolve column names from table data"""
1172+
code = "select * from users where id = 'user_123'"
1173+
tables = InMemoryTables(
1174+
tables=[
1175+
Table(
1176+
name="users",
1177+
columns=[
1178+
Column(name="id", schema=ColumnType.string),
1179+
Column(name="name", schema=ColumnType.string),
1180+
],
1181+
data=[
1182+
{"id": "user_123", "name": "Alice"},
1183+
{"id": "user_456", "name": "Bob"},
1184+
{"id": "user_789", "name": "Charlie"},
1185+
],
1186+
)
1187+
],
1188+
)
1189+
ctx = {}
1190+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1191+
self.assertEqual(result, [{"id": "user_123", "name": "Alice"}])
1192+
1193+
def test_where_clause_with_multiple_conditions(self):
1194+
"""Test WHERE clause with multiple column references using AND"""
1195+
code = "select * from products where price > 10 and stock > 0"
1196+
tables = InMemoryTables(
1197+
tables=[
1198+
Table(
1199+
name="products",
1200+
columns=[
1201+
Column(name="id", schema="int"),
1202+
Column(name="price", schema="int"),
1203+
Column(name="stock", schema="int"),
1204+
],
1205+
data=[
1206+
{"id": 1, "price": 5, "stock": 10},
1207+
{"id": 2, "price": 15, "stock": 5},
1208+
{"id": 3, "price": 20, "stock": 0},
1209+
{"id": 4, "price": 12, "stock": 3},
1210+
],
1211+
)
1212+
],
1213+
)
1214+
ctx = {}
1215+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1216+
self.assertEqual(
1217+
result,
1218+
[
1219+
{"id": 2, "price": 15, "stock": 5},
1220+
{"id": 4, "price": 12, "stock": 3},
1221+
],
1222+
)
1223+
1224+
def test_where_clause_column_comparison(self):
1225+
"""Test WHERE clause comparing two columns"""
1226+
code = "select * from inventory where quantity < min_quantity"
1227+
tables = InMemoryTables(
1228+
tables=[
1229+
Table(
1230+
name="inventory",
1231+
columns=[
1232+
Column(name="item", schema=ColumnType.string),
1233+
Column(name="quantity", schema="int"),
1234+
Column(name="min_quantity", schema="int"),
1235+
],
1236+
data=[
1237+
{"item": "A", "quantity": 5, "min_quantity": 10},
1238+
{"item": "B", "quantity": 20, "min_quantity": 15},
1239+
{"item": "C", "quantity": 3, "min_quantity": 5},
1240+
],
1241+
)
1242+
],
1243+
)
1244+
ctx = {}
1245+
result = eval_sql(code=code, tables=tables, ctx=ctx)
1246+
self.assertEqual(
1247+
result,
1248+
[
1249+
{"item": "A", "quantity": 5, "min_quantity": 10},
1250+
{"item": "C", "quantity": 3, "min_quantity": 5},
1251+
],
1252+
)

abstra_json_sql/lexer.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,27 @@ def extract_name(code: str):
5151
def start_with_keyword(code: str):
5252
for keyword in keywords:
5353
if code.upper().startswith(keyword.upper()):
54-
return True
54+
# Check if keyword is followed by a non-alphanumeric character
55+
# This ensures we don't match "IN" in "INventory"
56+
next_idx = len(keyword)
57+
if next_idx >= len(code):
58+
return True
59+
next_char = code[next_idx]
60+
if not (next_char.isalnum() or next_char == "_" or next_char == "."):
61+
return True
5562
return False
5663

5764

5865
def extract_keyword(code: str):
5966
for keyword in keywords:
6067
if code.upper().startswith(keyword.upper()):
61-
return Token("keyword", code[: len(keyword)]), code[len(keyword) :]
68+
# Check if keyword is followed by a non-alphanumeric character
69+
next_idx = len(keyword)
70+
if next_idx >= len(code):
71+
return Token("keyword", code[: len(keyword)]), code[len(keyword) :]
72+
next_char = code[next_idx]
73+
if not (next_char.isalnum() or next_char == "_" or next_char == "."):
74+
return Token("keyword", code[: len(keyword)]), code[len(keyword) :]
6275

6376

6477
def start_with_quoted_name(code: str):

0 commit comments

Comments
 (0)