Skip to content

Commit fc2cf19

Browse files
authored
Use chunked file reading to avoid loading entire files into memory (#37)
1 parent 7d95e66 commit fc2cf19

13 files changed

Lines changed: 781 additions & 81 deletions

.phpstan.neon

Lines changed: 0 additions & 4 deletions
This file was deleted.

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
}
3535
},
3636
"scripts": {
37-
"phpstan": "phpstan analyze -c .phpstan.neon",
37+
"phpstan": "phpstan analyze",
3838
"tests": "tester -C ./tests/cases"
3939
},
4040
"config": {

phpstan.neon

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
parameters:
2+
level: 8
3+
paths:
4+
- src
5+
- tests
6+
fileExtensions:
7+
- php
8+
- phpt

src/BaseMultiQueryParser.php

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace Nextras\MultiQueryParser;
4+
5+
use ArrayIterator;
6+
use Iterator;
7+
use Nextras\MultiQueryParser\Exception\RuntimeException;
8+
use function feof;
9+
use function fopen;
10+
use function fread;
11+
12+
13+
abstract class BaseMultiQueryParser implements IMultiQueryParser
14+
{
15+
/**
16+
* @param positive-int $chunkSize
17+
* @return Iterator<string>
18+
*/
19+
public function parseFile(string $path, int $chunkSize = self::DEFAULT_CHUNK_SIZE): Iterator
20+
{
21+
$handle = @fopen($path, 'rb');
22+
23+
if ($handle === false) {
24+
throw new RuntimeException("Cannot open file '$path'.");
25+
}
26+
27+
return $this->parseFileStream($handle, $chunkSize);
28+
}
29+
30+
31+
/**
32+
* @param resource $fileStream
33+
* @param positive-int $chunkSize
34+
* @return Iterator<string>
35+
*/
36+
public function parseFileStream($fileStream, int $chunkSize = self::DEFAULT_CHUNK_SIZE): Iterator
37+
{
38+
return $this->parseStringStream($this->toStringStream($fileStream, $chunkSize));
39+
}
40+
41+
42+
/**
43+
* @return Iterator<string>
44+
*/
45+
public function parseString(string $s): Iterator
46+
{
47+
return $this->parseStringStream(new ArrayIterator([$s]));
48+
}
49+
50+
51+
/**
52+
* @param Iterator<string> $stream
53+
* @return Iterator<string>
54+
*/
55+
abstract public function parseStringStream(Iterator $stream): Iterator;
56+
57+
58+
/**
59+
* @param resource $fileStream
60+
* @param positive-int $chunkSize
61+
* @return Iterator<string>
62+
*/
63+
private function toStringStream($fileStream, int $chunkSize): Iterator
64+
{
65+
while (!feof($fileStream)) {
66+
$chunk = fread($fileStream, $chunkSize);
67+
68+
if ($chunk === false) {
69+
throw new RuntimeException('Error reading file stream.');
70+
}
71+
72+
yield $chunk;
73+
}
74+
}
75+
}

src/IMultiQueryParser.php

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,33 @@
77

88
interface IMultiQueryParser
99
{
10+
public const DEFAULT_CHUNK_SIZE = 64 * 1024;
11+
12+
13+
/**
14+
* @param positive-int $chunkSize
15+
* @return Iterator<string>
16+
*/
17+
public function parseFile(string $path, int $chunkSize = self::DEFAULT_CHUNK_SIZE): Iterator;
18+
19+
20+
/**
21+
* @param resource $fileStream
22+
* @param positive-int $chunkSize
23+
* @return Iterator<string>
24+
*/
25+
public function parseFileStream($fileStream, int $chunkSize = self::DEFAULT_CHUNK_SIZE): Iterator;
26+
27+
28+
/**
29+
* @return Iterator<string>
30+
*/
31+
public function parseString(string $s): Iterator;
32+
33+
1034
/**
11-
* @return Iterator<int, string>
35+
* @param Iterator<string> $stream
36+
* @return Iterator<string>
1237
*/
13-
public function parseFile(string $path): Iterator;
38+
public function parseStringStream(Iterator $stream): Iterator;
1439
}

src/MySqlMultiQueryParser.php

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,23 @@
33
namespace Nextras\MultiQueryParser;
44

55
use Iterator;
6-
use Nextras\MultiQueryParser\Exception\RuntimeException;
7-
use function file_get_contents;
8-
use function preg_match;
96
use function preg_quote;
10-
use function strlen;
117

128

13-
class MySqlMultiQueryParser implements IMultiQueryParser
9+
class MySqlMultiQueryParser extends BaseMultiQueryParser
1410
{
15-
public function parseFile(string $path): Iterator
11+
public function parseStringStream(Iterator $stream): Iterator
1612
{
17-
$content = @file_get_contents($path);
18-
if ($content === false) {
19-
throw new RuntimeException("Cannot open file '$path'.");
20-
}
21-
22-
$offset = 0;
23-
$pattern = $this->getQueryPattern(';');
24-
25-
while (preg_match($pattern, $content, $match, 0, $offset) === 1) {
26-
$offset += strlen($match[0]);
13+
$patternIterator = new PatternIterator($stream, $this->getQueryPattern(';'));
2714

15+
foreach ($patternIterator as $match) {
2816
if (isset($match['delimiter']) && $match['delimiter'] !== '') {
29-
$pattern = $this->getQueryPattern($match['delimiter']);
17+
$patternIterator->setPattern($this->getQueryPattern($match['delimiter']));
18+
3019
} elseif (isset($match['query']) && $match['query'] !== '') {
3120
yield $match['query'];
32-
} else {
33-
break;
3421
}
3522
}
36-
37-
if ($offset !== strlen($content)) {
38-
throw new RuntimeException("Failed to parse file '$path', please report an issue.");
39-
}
4023
}
4124

4225

src/PatternIterator.php

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace Nextras\MultiQueryParser;
4+
5+
use Iterator;
6+
use IteratorAggregate;
7+
use Nextras\MultiQueryParser\Exception\RuntimeException;
8+
use function preg_last_error_msg;
9+
use function preg_match;
10+
use function strlen;
11+
use function substr;
12+
13+
14+
/**
15+
* @implements IteratorAggregate<int, array<mixed>>
16+
*/
17+
class PatternIterator implements IteratorAggregate
18+
{
19+
/**
20+
* @param Iterator<string> $stream
21+
*/
22+
public function __construct(
23+
private Iterator $stream,
24+
private string $pattern,
25+
) {
26+
}
27+
28+
29+
public function getPattern(): string
30+
{
31+
return $this->pattern;
32+
}
33+
34+
35+
public function setPattern(string $pattern): void
36+
{
37+
$this->pattern = $pattern;
38+
}
39+
40+
41+
public function getIterator(): Iterator
42+
{
43+
$s = '';
44+
$offset = 0;
45+
46+
while ($this->stream->valid()) {
47+
$s = substr($s, $offset) . $this->stream->current();
48+
$this->stream->next();
49+
$offset = 0;
50+
51+
while (true) {
52+
$result = preg_match($this->pattern, $s, $matches, 0, $offset);
53+
54+
if ($result === false) {
55+
throw new RuntimeException(preg_last_error_msg());
56+
}
57+
58+
if ($result !== 1) {
59+
break;
60+
}
61+
62+
if (strlen($matches[0]) === 0) {
63+
break 2;
64+
}
65+
66+
if (strlen($matches[0]) + $offset === strlen($s) && $this->stream->valid()) {
67+
break;
68+
}
69+
70+
yield $matches;
71+
$offset += strlen($matches[0]);
72+
}
73+
}
74+
75+
if ($offset !== strlen($s)) {
76+
throw new RuntimeException("Failed to parse stream, please report an issue.");
77+
}
78+
}
79+
}

src/PostgreSqlMultiQueryParser.php

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,19 @@
33
namespace Nextras\MultiQueryParser;
44

55
use Iterator;
6-
use Nextras\MultiQueryParser\Exception\RuntimeException;
7-
use function file_get_contents;
8-
use function preg_match;
9-
use function strlen;
106

117

12-
class PostgreSqlMultiQueryParser implements IMultiQueryParser
8+
class PostgreSqlMultiQueryParser extends BaseMultiQueryParser
139
{
14-
public function parseFile(string $path): Iterator
10+
public function parseStringStream(Iterator $stream): Iterator
1511
{
16-
$content = @file_get_contents($path);
17-
if ($content === false) {
18-
throw new RuntimeException("Cannot open file '$path'.");
19-
}
20-
21-
$offset = 0;
22-
$pattern = $this->getQueryPattern();
23-
24-
while (preg_match($pattern, $content, $match, 0, $offset)) {
25-
$offset += strlen($match[0]);
12+
$patternIterator = new PatternIterator($stream, $this->getQueryPattern());
2613

14+
foreach ($patternIterator as $match) {
2715
if (isset($match['query']) && $match['query'] !== '') {
2816
yield $match['query'];
29-
} else {
30-
break;
3117
}
3218
}
33-
34-
if ($offset !== strlen($content)) {
35-
throw new RuntimeException("Failed to parse file '$path', please report an issue.");
36-
}
3719
}
3820

3921

src/SqlServerMultiQueryParser.php

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,19 @@
33
namespace Nextras\MultiQueryParser;
44

55
use Iterator;
6-
use Nextras\MultiQueryParser\Exception\RuntimeException;
7-
use function file_get_contents;
8-
use function preg_match;
9-
use function strlen;
106

117

12-
class SqlServerMultiQueryParser implements IMultiQueryParser
8+
class SqlServerMultiQueryParser extends BaseMultiQueryParser
139
{
14-
public function parseFile(string $path): Iterator
10+
public function parseStringStream(Iterator $stream): Iterator
1511
{
16-
$content = @file_get_contents($path);
17-
if ($content === false) {
18-
throw new RuntimeException("Cannot open file '$path'.");
19-
}
20-
21-
$offset = 0;
22-
$pattern = $this->getQueryPattern();
23-
24-
while (preg_match($pattern, $content, $match, 0, $offset)) {
25-
$offset += strlen($match[0]);
12+
$patternIterator = new PatternIterator($stream, $this->getQueryPattern());
2613

14+
foreach ($patternIterator as $match) {
2715
if (isset($match['query']) && $match['query'] !== '') {
2816
yield $match['query'];
29-
} else {
30-
break;
3117
}
3218
}
33-
34-
if ($offset !== strlen($content)) {
35-
throw new RuntimeException("Failed to parse file '$path', please report an issue.");
36-
}
3719
}
3820

3921

0 commit comments

Comments
 (0)