Skip to content

Commit 4aac4f2

Browse files
committed
Replace \z fallbacks with (*PRUNE) for chunk boundary handling
Instead of adding \z to every closing delimiter to make incomplete constructs extend to end-of-buffer, use PCRE's (*PRUNE) verb after each opening delimiter. Once the opening delimiter matches (e.g. the ' that starts a string literal), (*PRUNE) commits the regex engine to that construct — if the closing delimiter is absent (because it is in a later chunk), the overall match fails rather than falling back to the single-character alternative. PatternIterator then loads the next chunk and retries with more data. This is semantically cleaner: "once you enter a construct, you must finish it." It also avoids cross-construct interactions — the \z approach required a PostgreSQL-specific workaround where adding \z to standard string closings broke E-string parsing. Behavioral change: malformed SQL with genuinely unclosed constructs at end-of-input now produces a parse error instead of being silently absorbed into a query.
1 parent dd02d07 commit 4aac4f2

4 files changed

Lines changed: 22 additions & 21 deletions

File tree

src/MySqlMultiQueryParser.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ private function getQueryPattern(string $delimiter): string
3232
~
3333
(?:
3434
\\s
35-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
35+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
3636
| --[^\\n]*+(?:\\n|\\z)
3737
)*+
3838
@@ -47,9 +47,9 @@ private function getQueryPattern(string $delimiter): string
4747
(?<query>
4848
(?:
4949
[^$delimiterFirstBytePattern'\"/$-]++
50-
| ' (?: \\\\. | [^'] )*+ (?:'|\\z)
51-
| \" (?: \\\\. | [^\"] )*+ (?:\"|\\z)
52-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
50+
| ' (*PRUNE) (?: \\\\. | [^'] )*+ '
51+
| \" (*PRUNE) (?: \\\\. | [^\"] )*+ \"
52+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
5353
| --[^\\n]*+(?:\\n|\\z)
5454
| (?!$delimiterPattern) .
5555
)*+

src/PatternIterator.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
*
2121
* Pattern design constraint: patterns with opening/closing delimiter constructs (such as
2222
* string literals `'...'`, block comments `/*...* /`, or dollar-quoted strings `$$...$$`)
23-
* must include `\z` as a fallback for the closing delimiter, e.g. `' [^']* (?: ' | \z )`.
23+
* must include `(*PRUNE)` after the opening delimiter, e.g. `' (*PRUNE) [^']* '`.
2424
* Without this, when a chunk boundary falls inside such a construct, the closing delimiter
2525
* is absent from the buffer, the construct fails to match, and the regex falls back to a
2626
* generic single-character alternative (e.g. `(?!;) .`). This exposes characters inside the
2727
* construct (like semicolons inside a string) as false delimiters, producing an incorrect
2828
* match that terminates in the middle of the buffer — where the safety mechanism cannot
29-
* detect the problem. The `\z` fallback ensures incomplete constructs extend to the end of
30-
* the buffer, triggering the safety mechanism to wait for more data.
29+
* detect the problem. The `(*PRUNE)` verb ensures that once the opening delimiter matches,
30+
* the regex engine commits to the construct — if the closing delimiter is missing (because
31+
* it is in a later chunk), the overall match fails, causing the iterator to load more data.
3132
*
3233
* @implements IteratorAggregate<int, array<mixed>>
3334
*/

src/PostgreSqlMultiQueryParser.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ private function getQueryPattern(): string
2727
return /** @lang PhpRegExp */ '~
2828
(?:
2929
\\s
30-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
30+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
3131
| -- [^\\n]*+
3232
)*+
3333
@@ -36,11 +36,11 @@ private function getQueryPattern(): string
3636
(?<query>
3737
(?:
3838
(?:[^;\'"/$eE-]|[eE](?!\'))++
39-
| \' (?: [^\'] )*+ (?:\'|\\z)
40-
| [eE]\' (?: \\\\. | [^\'] )*+ (?:\'|\\z)
41-
| " (?: [^"] )*+ (?:"|\\z)
42-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
43-
| (\\$(?:[a-zA-Z_\\x80-\\xFF][\\w\\x80-\\xFF]*+)?\\$) (?: [^$]++ | (?!\\g{-1})\\$ )*+ (?: \\g{-1} | \\z )
39+
| \' (*PRUNE) (?: [^\'] )*+ \'
40+
| [eE]\' (*PRUNE) (?: \\\\. | [^\'] )*+ \'
41+
| " (*PRUNE) (?: [^"] )*+ "
42+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
43+
| (\\$(?:[a-zA-Z_\\x80-\\xFF][\\w\\x80-\\xFF]*+)?\\$) (*PRUNE) (?: [^$]++ | (?!\\g{-1})\\$ )*+ \\g{-1}
4444
| -- [^\\n]*+
4545
| (?!;) .
4646
)*+

src/SqlServerMultiQueryParser.php

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ private function getQueryPattern(): string
2424
$simpleQuery = /** @lang PhpRegExp */ '~
2525
(?:
2626
\\s
27-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
27+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
2828
| -- [^\\n]*+
2929
)*+
3030
(?<simplequery>
3131
(?:
3232
[^;\'"[/-]++
33-
| \' (?: [^\'] )*+ (?:\'|\\z)
34-
| " (?: [^"] )*+ (?:"|\\z)
35-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
33+
| \' (*PRUNE) (?: [^\'] )*+ \'
34+
| " (*PRUNE) (?: [^"] )*+ "
35+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
3636
| -- [^\\n]*+
3737
| (?!;) .
3838
)++
@@ -42,7 +42,7 @@ private function getQueryPattern(): string
4242
return /** @lang PhpRegExp */ '~
4343
(?:
4444
\\s
45-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
45+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
4646
| -- [^\\n]*+
4747
)*+
4848
@@ -51,9 +51,9 @@ private function getQueryPattern(): string
5151
(?<query>
5252
(?:
5353
[^B;\'"[/-]++
54-
| \' (?: [^\'] )*+ (?:\'|\\z)
55-
| " (?: [^"] )*+ (?:"|\\z)
56-
| /\\* (?: [^*]++ | \\*(?!/) )*+ (?:\\*/|\\z)
54+
| \' (*PRUNE) (?: [^\'] )*+ \'
55+
| " (*PRUNE) (?: [^"] )*+ "
56+
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
5757
| BEGIN (?: \s*END\s*| ' . substr($simpleQuery, 1, -2) . ')*
5858
| -- [^\\n]*+
5959
| (?!;) .

0 commit comments

Comments
 (0)