Skip to content

Commit c14813e

Browse files
authored
Add SQLite multi-query parser (#48)
1 parent 97d4ae0 commit c14813e

4 files changed

Lines changed: 570 additions & 1 deletion

File tree

readme.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ A streaming PHP parser for splitting multi-query SQL files into individual state
1212
- **MySQL** -- backtick identifiers, `DELIMITER` command, `#` comments
1313
- **PostgreSQL** -- dollar-quoted strings (`$BODY$...$BODY$`), `E'...'` escape strings
1414
- **SQL Server** -- `[bracketed]` identifiers, `BEGIN...END` blocks
15+
- **SQLite** -- all three identifier styles (`"double"`, `` `backtick` ``, `[bracket]`), `BEGIN...END` blocks for triggers
1516

1617
All parsers handle standard SQL comments (`--`, `/* */`), quoted strings, and semicolon delimiters.
1718

@@ -57,7 +58,7 @@ foreach ($parser->parseFileStream($stream) as $query) {
5758
}
5859
```
5960

60-
Available parsers: `MySqlMultiQueryParser`, `PostgreSqlMultiQueryParser`, `SqlServerMultiQueryParser`.
61+
Available parsers: `MySqlMultiQueryParser`, `PostgreSqlMultiQueryParser`, `SqlServerMultiQueryParser`, `SqliteMultiQueryParser`.
6162

6263
### License
6364

src/SqliteMultiQueryParser.php

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace Nextras\MultiQueryParser;
4+
5+
use Iterator;
6+
7+
8+
class SqliteMultiQueryParser extends BaseMultiQueryParser
9+
{
10+
public function parseStringStream(Iterator $stream): Iterator
11+
{
12+
$patternIterator = new PatternIterator($stream, $this->getQueryPattern());
13+
14+
foreach ($patternIterator as $match) {
15+
if (isset($match['query']) && $match['query'] !== '') {
16+
yield $match['query'];
17+
}
18+
}
19+
}
20+
21+
22+
private function getQueryPattern(): string
23+
{
24+
// (*PRUNE) must appear inline (not inside DEFINE subroutines) because PCRE confines
25+
// backtracking verbs to the subroutine scope. The inner bodies are defined once in
26+
// DEFINE and referenced after the inline (*PRUNE) to avoid pattern duplication.
27+
return /** @lang PhpRegExp */ '~
28+
(?(DEFINE)
29+
(?<sqI> (?: \'\' | [^\'] )*+ \' )
30+
(?<dqI> (?: "" | [^"] )*+ " )
31+
(?<btI> (?: `` | [^`] )*+ ` )
32+
(?<bkI> [^\]]*+ (?: \]\] [^\]]*+ )* \] )
33+
(?<bcI> (?: [^*]++ | \*(?!/) )*+ \*/ )
34+
(?<lc> -- [^\n]*+ )
35+
(?<skip>
36+
(?:
37+
\s
38+
| /\* (*PRUNE) (?&bcI)
39+
| (?&lc)
40+
)*+
41+
)
42+
(?<stmt>
43+
(?&skip)
44+
(?:
45+
[^;\'"`[/-]++
46+
| \' (*PRUNE) (?&sqI)
47+
| " (*PRUNE) (?&dqI)
48+
| ` (*PRUNE) (?&btI)
49+
| \[ (*PRUNE) (?&bkI)
50+
| /\* (*PRUNE) (?&bcI)
51+
| (?&lc)
52+
| (?!;) .
53+
)++
54+
;
55+
)
56+
)
57+
58+
(?&skip)
59+
60+
(?:
61+
(?:
62+
(?<query>
63+
(?:
64+
[^bB;\'"`[/-]++
65+
| \' (*PRUNE) (?&sqI)
66+
| " (*PRUNE) (?&dqI)
67+
| ` (*PRUNE) (?&btI)
68+
| \[ (*PRUNE) (?&bkI)
69+
| /\* (*PRUNE) (?&bcI)
70+
| (?i:BEGIN) (?!\s*(?:(?i:TRANSACTION|DEFERRED|IMMEDIATE|EXCLUSIVE)\b|;|\z)) (*PRUNE) (?: (?i:\s*END)\s* | (?&stmt) )*
71+
| (?&lc)
72+
| (?!;) .
73+
)*+
74+
)
75+
(?: ; | \z )
76+
)
77+
|
78+
(?:
79+
\z
80+
)
81+
)
82+
~xsAS';
83+
}
84+
}
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
<?php declare(strict_types = 1);
2+
3+
/**
4+
* @testCase
5+
*/
6+
7+
namespace Nextras\MultiQueryParser;
8+
9+
use Tester\Assert;
10+
11+
12+
require_once __DIR__ . '/../bootstrap.php';
13+
require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';
14+
15+
16+
class SqliteMultiQueryParserTest extends MultiQueryParserTestCase
17+
{
18+
protected function createParser(): IMultiQueryParser
19+
{
20+
return new SqliteMultiQueryParser();
21+
}
22+
23+
24+
protected function getDataFilePath(): string
25+
{
26+
return __DIR__ . '/../data/sqlite.sql';
27+
}
28+
29+
30+
protected function getExpectedFileQueryCount(): int
31+
{
32+
return 62;
33+
}
34+
35+
36+
public function testFile(): void
37+
{
38+
$parser = $this->createParser();
39+
$queries = iterator_to_array($parser->parseFile($this->getDataFilePath()));
40+
Assert::count($this->getExpectedFileQueryCount(), $queries);
41+
Assert::same("CREATE TRIGGER trigger_book_collections_update
42+
AFTER UPDATE ON book_collections
43+
FOR EACH ROW
44+
BEGIN
45+
UPDATE book_collections SET updated_at = datetime('now') WHERE id = NEW.id;
46+
END", $queries[19]);
47+
}
48+
49+
50+
/**
51+
* @return list<array{string, list<string>}>
52+
*/
53+
protected function provideSuperfluousSemicolonsData(): array
54+
{
55+
return [
56+
[
57+
'SELECT 1 AS semicolon_madness;;;',
58+
['SELECT 1 AS semicolon_madness'],
59+
],
60+
[
61+
';;',
62+
[],
63+
],
64+
[
65+
';;;',
66+
[],
67+
],
68+
[
69+
';SELECT 1;',
70+
['SELECT 1'],
71+
],
72+
[
73+
'SELECT 1;;SELECT 2;',
74+
['SELECT 1', 'SELECT 2'],
75+
],
76+
[
77+
'SELECT 1; ; SELECT 2;',
78+
['SELECT 1', 'SELECT 2'],
79+
],
80+
];
81+
}
82+
83+
84+
/**
85+
* @return list<array{string, list<string>}>
86+
*/
87+
protected function provideEdgeCasesData(): array
88+
{
89+
return [
90+
// Empty / whitespace-only input
91+
['', []],
92+
[" \n\t\n ", []],
93+
94+
// Single-quoted strings protect semicolons
95+
["SELECT 'a;b';", ["SELECT 'a;b'"]],
96+
["SELECT ';;;';", ["SELECT ';;;'"]],
97+
["SELECT '';", ["SELECT ''"]],
98+
99+
// Doubled single quotes
100+
["SELECT 'it''s';", ["SELECT 'it''s'"]],
101+
102+
// Double-quoted identifiers protect semicolons
103+
['SELECT "col;name" FROM t;', ['SELECT "col;name" FROM t']],
104+
105+
// Doubled double quotes inside identifiers
106+
['SELECT "col""name" FROM t;', ['SELECT "col""name" FROM t']],
107+
108+
// Backtick identifiers protect semicolons
109+
['SELECT `col;name` FROM t;', ['SELECT `col;name` FROM t']],
110+
111+
// Doubled backticks inside identifiers
112+
['SELECT `col``name` FROM t;', ['SELECT `col``name` FROM t']],
113+
114+
// Bracket identifiers protect semicolons
115+
['SELECT [col;name] FROM t;', ['SELECT [col;name] FROM t']],
116+
['SELECT [a;b], [c;d] FROM t;', ['SELECT [a;b], [c;d] FROM t']],
117+
118+
// Escaped brackets (doubled ]) inside bracket identifiers
119+
['SELECT [col]]name] FROM t;', ['SELECT [col]]name] FROM t']],
120+
121+
// Semicolons inside comments are not delimiters
122+
["SELECT /* ; */ 1;", ["SELECT /* ; */ 1"]],
123+
["SELECT 1; -- has ; in comment\nSELECT 2;", ["SELECT 1", "SELECT 2"]],
124+
125+
// Line comment inside a query
126+
["SELECT 1 -- comment with ;\nSELECT 2;", ["SELECT 1 -- comment with ;\nSELECT 2"]],
127+
128+
// Queries without trailing semicolon
129+
["SELECT 1", ["SELECT 1"]],
130+
["SELECT 1; SELECT 2", ["SELECT 1", "SELECT 2"]],
131+
132+
// Forward slash and dash not starting comments
133+
["SELECT 5/3;", ["SELECT 5/3"]],
134+
["SELECT 5-3;", ["SELECT 5-3"]],
135+
136+
// Only comments
137+
["/* only a comment */", []],
138+
["-- only a comment", []],
139+
140+
// Comment positioning
141+
["/* prefix */ SELECT 1;", ["SELECT 1"]],
142+
["-- prefix\nSELECT 1;", ["SELECT 1"]],
143+
["SELECT 1; /* between */ SELECT 2;", ["SELECT 1", "SELECT 2"]],
144+
145+
// Block comment edge cases
146+
["SELECT /* contains * star */ 1;", ["SELECT /* contains * star */ 1"]],
147+
148+
// Non-nesting block comments (SQLite does NOT support nesting)
149+
["SELECT /* outer /* inner */ 1;", ["SELECT /* outer /* inner */ 1"]],
150+
151+
// CRLF line endings
152+
["SELECT 1;\r\nSELECT 2;\r\n", ["SELECT 1", "SELECT 2"]],
153+
154+
// BEGIN...END block with internal semicolons (treated as single query)
155+
[
156+
"BEGIN\n\tSELECT 1;\n\tSELECT 2;\nEND;",
157+
["BEGIN\n\tSELECT 1;\n\tSELECT 2;\nEND"],
158+
],
159+
160+
// BEGIN...END with only END (no internal queries)
161+
["BEGIN END;", ["BEGIN END"]],
162+
["BEGIN\nEND;", ["BEGIN\nEND"]],
163+
164+
// BEGIN keyword inside a string literal (should not trigger BEGIN...END)
165+
["SELECT 'BEGIN';", ["SELECT 'BEGIN'"]],
166+
['SELECT "BEGIN";', ['SELECT "BEGIN"']],
167+
168+
// Multiple BEGIN...END blocks as separate queries
169+
[
170+
"BEGIN\n\tSELECT 1;\nEND;\nBEGIN\n\tSELECT 2;\nEND;",
171+
["BEGIN\n\tSELECT 1;\nEND", "BEGIN\n\tSELECT 2;\nEND"],
172+
],
173+
174+
// BEGIN...END with string containing semicolons
175+
[
176+
"BEGIN\n\tSELECT 'a;b';\nEND;",
177+
["BEGIN\n\tSELECT 'a;b';\nEND"],
178+
],
179+
180+
// BEGIN...END preceded by other content (CREATE TRIGGER)
181+
[
182+
"CREATE TRIGGER t AFTER INSERT ON x FOR EACH ROW\nBEGIN\n\tSELECT 1;\nEND;",
183+
["CREATE TRIGGER t AFTER INSERT ON x FOR EACH ROW\nBEGIN\n\tSELECT 1;\nEND"],
184+
],
185+
186+
// BEGIN TRANSACTION is a simple statement (not compound)
187+
["BEGIN TRANSACTION; SELECT 1;", ["BEGIN TRANSACTION", "SELECT 1"]],
188+
["BEGIN DEFERRED; SELECT 1;", ["BEGIN DEFERRED", "SELECT 1"]],
189+
["BEGIN IMMEDIATE; SELECT 1;", ["BEGIN IMMEDIATE", "SELECT 1"]],
190+
["BEGIN EXCLUSIVE; SELECT 1;", ["BEGIN EXCLUSIVE", "SELECT 1"]],
191+
192+
// Bare BEGIN; is a simple statement (transaction)
193+
["BEGIN; SELECT 1;", ["BEGIN", "SELECT 1"]],
194+
195+
// Mixed identifier styles in one query
196+
['SELECT [a], "b", `c` FROM t;', ['SELECT [a], "b", `c` FROM t']],
197+
];
198+
}
199+
200+
201+
/**
202+
* @return list<array{list<string>, list<string>}>
203+
*/
204+
protected function provideChunkBoundaryData(): array
205+
{
206+
return [
207+
// Single-quoted string spanning chunks
208+
[
209+
["SELECT 'a;b", "c';"],
210+
["SELECT 'a;bc'"],
211+
],
212+
// Double-quoted identifier spanning chunks
213+
[
214+
['SELECT "a;b', 'c";'],
215+
['SELECT "a;bc"'],
216+
],
217+
// Backtick identifier spanning chunks
218+
[
219+
['SELECT `a;b', 'c`;'],
220+
['SELECT `a;bc`'],
221+
],
222+
// Bracket identifier spanning chunks
223+
[
224+
["SELECT [col;na", "me] FROM t;"],
225+
["SELECT [col;name] FROM t"],
226+
],
227+
// Block comment spanning chunks
228+
[
229+
["SELECT /* a;b", "c */ 1;"],
230+
["SELECT /* a;bc */ 1"],
231+
],
232+
// Block comment in leading whitespace spanning chunks
233+
[
234+
["/* x;y", "z */ SELECT 1;"],
235+
["SELECT 1"],
236+
],
237+
// BEGIN...END with string spanning chunks
238+
[
239+
["BEGIN\n\tSELECT 'a;b", "c';\nEND;"],
240+
["BEGIN\n\tSELECT 'a;bc';\nEND"],
241+
],
242+
];
243+
}
244+
}
245+
246+
247+
(new SqliteMultiQueryParserTest())->run();

0 commit comments

Comments
 (0)