From 2d5ef7302d99e6db1428409079180b1162efbea4 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:45:48 -0300 Subject: [PATCH 01/14] Guard stream decoding under low memory Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/FilterHelper.php | 56 +++++++++++++++++- .../PdfParser/RawData/RawDataParser.php | 59 ++++++++++++++++++- .../Integration/DocumentIssueFocusTest.php | 7 +++ 3 files changed, 118 insertions(+), 4 deletions(-) diff --git a/src/Smalot/PdfParser/RawData/FilterHelper.php b/src/Smalot/PdfParser/RawData/FilterHelper.php index 87f5524d7..9c978a5c7 100644 --- a/src/Smalot/PdfParser/RawData/FilterHelper.php +++ b/src/Smalot/PdfParser/RawData/FilterHelper.php @@ -264,10 +264,12 @@ protected function decodeFilterASCII85Decode(string $data): string */ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string { + $effectiveDecodeMemoryLimit = $this->getEffectiveDecodeMemoryLimit($decodeMemoryLimit); + // Uncatchable E_WARNING for "data error" is @ suppressed // so execution may proceed with an alternate decompression // method. - $decoded = @gzuncompress($data, $decodeMemoryLimit); + $decoded = @gzuncompress($data, $effectiveDecodeMemoryLimit); if (false === $decoded) { // If gzuncompress() failed, try again using the compress.zlib:// @@ -278,10 +280,10 @@ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit) if (false != $ztmp) { fwrite($ztmp, "\x1f\x8b\x08\x00\x00\x00\x00\x00".$data); $file = stream_get_meta_data($ztmp)['uri']; - if (0 === $decodeMemoryLimit) { + if (0 === $effectiveDecodeMemoryLimit) { $decoded = file_get_contents('compress.zlib://'.$file); } else { - $decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $decodeMemoryLimit); + $decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $effectiveDecodeMemoryLimit); } fclose($ztmp); } @@ -295,6 +297,54 @@ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit) return $decoded; } + private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int + { + if ($decodeMemoryLimit > 0) { + return $decodeMemoryLimit; + } + + $memoryLimit = $this->parseIniMemoryLimit((string) ini_get('memory_limit')); + if ($memoryLimit <= 0) { + // Unlimited PHP memory limit. + return 0; + } + + // Keep substantial headroom because zlib decoding can transiently allocate + // more memory than the returned string. + $available = $memoryLimit - memory_get_usage(true); + if ($available <= (16 * 1024 * 1024)) { + return 1024 * 1024; + } + + $safeLimit = (int) floor(($available - (8 * 1024 * 1024)) / 2); + + return (int) min(max($safeLimit, 1024 * 1024), 256 * 1024 * 1024); + } + + private function parseIniMemoryLimit(string $value): int + { + $value = trim($value); + if ('' === $value || '-1' === $value) { + return -1; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + return $number * 1024 * 1024 * 1024; + + case 'm': + return $number * 1024 * 1024; + + case 'k': + return $number * 1024; + + default: + return (int) $value; + } + } + /** * LZWDecode * diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 7133b62ba..313d5995d 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -822,7 +822,9 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header // we get stream length here to later help preg_match test less data $streamLen = (int) $this->getHeaderValue($headerDic, 'Length', 'numeric', 0); - $skip = false === $this->config->getRetainImageContent() && 'XObject' == $this->getHeaderValue($headerDic, 'Type', '/') && 'Image' == $this->getHeaderValue($headerDic, 'Subtype', '/'); + $skip = (false === $this->config->getRetainImageContent() || $this->shouldSkipImageStreamContent($headerDic)) + && 'XObject' == $this->getHeaderValue($headerDic, 'Type', '/') + && 'Image' == $this->getHeaderValue($headerDic, 'Subtype', '/'); $pregResult = preg_match( '/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', @@ -863,6 +865,61 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header return [$objtype, $objval, $offset]; } + private function shouldSkipImageStreamContent(array $headerDic): bool + { + $memoryLimit = $this->getMemoryLimitBytes(); + if ($memoryLimit <= 0) { + return false; + } + + if ('XObject' != $this->getHeaderValue($headerDic, 'Type', '/') || 'Image' != $this->getHeaderValue($headerDic, 'Subtype', '/')) { + return false; + } + + if ($memoryLimit <= (256 * 1024 * 1024)) { + return true; + } + + return memory_get_usage(true) >= (int) floor($memoryLimit * 0.8); + } + + private function getMemoryLimitBytes(): int + { + static $memoryLimit = null; + if (null !== $memoryLimit) { + return $memoryLimit; + } + + $value = trim((string) ini_get('memory_limit')); + if ('' === $value || '-1' === $value) { + $memoryLimit = -1; + + return $memoryLimit; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + $memoryLimit = $number * 1024 * 1024 * 1024; + break; + + case 'm': + $memoryLimit = $number * 1024 * 1024; + break; + + case 'k': + $memoryLimit = $number * 1024; + break; + + default: + $memoryLimit = (int) $value; + break; + } + + return $memoryLimit; + } + /** * Get value of an object header's section (obj << YYY >> part ). * diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 7c7fe7e68..3517ab7a3 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -111,4 +111,11 @@ public function testPDFDocEncodingDecode(): void $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž'; self::assertStringContainsString($testSubject, $details['Subject']); } + + public function testParseFileWithLargeFlateStreams(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); + + self::assertCount(28, $document->getPages()); + } } From d59074ea4cdb7db8d42146d6a76b170f36ac91ad Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:51:03 -0300 Subject: [PATCH 02/14] Fix null header handling in stream skip guard Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/RawDataParser.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 313d5995d..25048fc87 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -865,8 +865,12 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header return [$objtype, $objval, $offset]; } - private function shouldSkipImageStreamContent(array $headerDic): bool + private function shouldSkipImageStreamContent(?array $headerDic): bool { + if (false === \is_array($headerDic)) { + return false; + } + $memoryLimit = $this->getMemoryLimitBytes(); if ($memoryLimit <= 0) { return false; From 496c39e634a25576c1e302bc4599e458fe56938e Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:53:09 -0300 Subject: [PATCH 03/14] Refactor duplicated memory limit parsing Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/FilterHelper.php | 27 +---------- src/Smalot/PdfParser/RawData/MemoryLimit.php | 45 +++++++++++++++++++ .../PdfParser/RawData/RawDataParser.php | 27 +---------- 3 files changed, 47 insertions(+), 52 deletions(-) create mode 100644 src/Smalot/PdfParser/RawData/MemoryLimit.php diff --git a/src/Smalot/PdfParser/RawData/FilterHelper.php b/src/Smalot/PdfParser/RawData/FilterHelper.php index 9c978a5c7..88c4f12ad 100644 --- a/src/Smalot/PdfParser/RawData/FilterHelper.php +++ b/src/Smalot/PdfParser/RawData/FilterHelper.php @@ -303,7 +303,7 @@ private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int return $decodeMemoryLimit; } - $memoryLimit = $this->parseIniMemoryLimit((string) ini_get('memory_limit')); + $memoryLimit = MemoryLimit::toBytes((string) ini_get('memory_limit')); if ($memoryLimit <= 0) { // Unlimited PHP memory limit. return 0; @@ -320,31 +320,6 @@ private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int return (int) min(max($safeLimit, 1024 * 1024), 256 * 1024 * 1024); } - - private function parseIniMemoryLimit(string $value): int - { - $value = trim($value); - if ('' === $value || '-1' === $value) { - return -1; - } - - $unit = strtolower(substr($value, -1)); - $number = (int) $value; - switch ($unit) { - case 'g': - return $number * 1024 * 1024 * 1024; - - case 'm': - return $number * 1024 * 1024; - - case 'k': - return $number * 1024; - - default: - return (int) $value; - } - } - /** * LZWDecode * diff --git a/src/Smalot/PdfParser/RawData/MemoryLimit.php b/src/Smalot/PdfParser/RawData/MemoryLimit.php new file mode 100644 index 000000000..bbb9160fe --- /dev/null +++ b/src/Smalot/PdfParser/RawData/MemoryLimit.php @@ -0,0 +1,45 @@ + + * + * @date 2026-04-24 + * + * @license LGPLv3 + * + * @url + */ + +namespace Smalot\PdfParser\RawData; + +final class MemoryLimit +{ + /** + * Converts PHP ini memory values (for example "128M", "1G", "-1") to bytes. + */ + public static function toBytes(string $value): int + { + $value = trim($value); + if ('' === $value || '-1' === $value) { + return -1; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + return $number * 1024 * 1024 * 1024; + + case 'm': + return $number * 1024 * 1024; + + case 'k': + return $number * 1024; + + default: + return (int) $value; + } + } +} \ No newline at end of file diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 25048fc87..28cd9ba41 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -894,32 +894,7 @@ private function getMemoryLimitBytes(): int return $memoryLimit; } - $value = trim((string) ini_get('memory_limit')); - if ('' === $value || '-1' === $value) { - $memoryLimit = -1; - - return $memoryLimit; - } - - $unit = strtolower(substr($value, -1)); - $number = (int) $value; - switch ($unit) { - case 'g': - $memoryLimit = $number * 1024 * 1024 * 1024; - break; - - case 'm': - $memoryLimit = $number * 1024 * 1024; - break; - - case 'k': - $memoryLimit = $number * 1024; - break; - - default: - $memoryLimit = (int) $value; - break; - } + $memoryLimit = MemoryLimit::toBytes((string) ini_get('memory_limit')); return $memoryLimit; } From 5161abc7449f9bc0042613f8df90498f5272ded7 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:54:44 -0300 Subject: [PATCH 04/14] Add unit tests for MemoryLimit helper Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Unit/MemoryLimitTest.php | 61 ++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/PHPUnit/Unit/MemoryLimitTest.php diff --git a/tests/PHPUnit/Unit/MemoryLimitTest.php b/tests/PHPUnit/Unit/MemoryLimitTest.php new file mode 100644 index 000000000..583cd2f81 --- /dev/null +++ b/tests/PHPUnit/Unit/MemoryLimitTest.php @@ -0,0 +1,61 @@ + + * + * @date 2026-04-24 + * + * @license LGPLv3 + * + * @url + */ + +namespace PHPUnitTests\Unit; + +use PHPUnitTests\TestCase; +use Smalot\PdfParser\RawData\MemoryLimit; + +class MemoryLimitTest extends TestCase +{ + public function testToBytesWithGigabytes(): void + { + $this->assertSame(1073741824, MemoryLimit::toBytes('1G')); + } + + public function testToBytesWithMegabytes(): void + { + $this->assertSame(268435456, MemoryLimit::toBytes('256M')); + } + + public function testToBytesWithKilobytes(): void + { + $this->assertSame(65536, MemoryLimit::toBytes('64K')); + } + + public function testToBytesWithoutUnit(): void + { + $this->assertSame(2048, MemoryLimit::toBytes('2048')); + } + + public function testToBytesTrimsInput(): void + { + $this->assertSame(33554432, MemoryLimit::toBytes(' 32M ')); + } + + public function testToBytesHandlesLowercaseUnits(): void + { + $this->assertSame(1048576, MemoryLimit::toBytes('1m')); + } + + public function testToBytesReturnsMinusOneForUnlimitedValue(): void + { + $this->assertSame(-1, MemoryLimit::toBytes('-1')); + } + + public function testToBytesReturnsMinusOneForEmptyValue(): void + { + $this->assertSame(-1, MemoryLimit::toBytes('')); + } +} From fb5acd2ac26e8d47c6f3b719c611ecdb23bed3fc Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:58:04 -0300 Subject: [PATCH 05/14] Refactor MemoryLimit test with data provider Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Unit/MemoryLimitTest.php | 63 ++++++++++---------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/tests/PHPUnit/Unit/MemoryLimitTest.php b/tests/PHPUnit/Unit/MemoryLimitTest.php index 583cd2f81..53088ec18 100644 --- a/tests/PHPUnit/Unit/MemoryLimitTest.php +++ b/tests/PHPUnit/Unit/MemoryLimitTest.php @@ -3,7 +3,7 @@ /** * @file This file is part of the PdfParser library. * - * @author Konrad Abicht + * @author Vitor Mattos <1079143+vitormattos@users.noreply.github.com> * * @date 2026-04-24 * @@ -19,43 +19,28 @@ class MemoryLimitTest extends TestCase { - public function testToBytesWithGigabytes(): void - { - $this->assertSame(1073741824, MemoryLimit::toBytes('1G')); - } - - public function testToBytesWithMegabytes(): void - { - $this->assertSame(268435456, MemoryLimit::toBytes('256M')); - } - - public function testToBytesWithKilobytes(): void - { - $this->assertSame(65536, MemoryLimit::toBytes('64K')); - } - - public function testToBytesWithoutUnit(): void - { - $this->assertSame(2048, MemoryLimit::toBytes('2048')); - } - - public function testToBytesTrimsInput(): void - { - $this->assertSame(33554432, MemoryLimit::toBytes(' 32M ')); - } - - public function testToBytesHandlesLowercaseUnits(): void - { - $this->assertSame(1048576, MemoryLimit::toBytes('1m')); - } - - public function testToBytesReturnsMinusOneForUnlimitedValue(): void - { - $this->assertSame(-1, MemoryLimit::toBytes('-1')); - } - - public function testToBytesReturnsMinusOneForEmptyValue(): void - { - $this->assertSame(-1, MemoryLimit::toBytes('')); + /** + * @dataProvider toBytesProvider + */ + public function testToBytes(string $input, int $expected): void + { + $this->assertSame($expected, MemoryLimit::toBytes($input)); + } + + /** + * @return array + */ + public static function toBytesProvider(): array + { + return [ + 'gigabytes' => ['1G', 1073741824], + 'megabytes' => ['256M', 268435456], + 'kilobytes' => ['64K', 65536], + 'without unit' => ['2048', 2048], + 'trimmed value' => [' 32M ', 33554432], + 'lowercase unit' => ['1m', 1048576], + 'unlimited value' => ['-1', -1], + 'empty value' => ['', -1], + ]; } } From fa4e821b435c4409cbf31971730a5d7052fcce25 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:00:25 -0300 Subject: [PATCH 06/14] Apply cs-fixer formatting for MemoryLimit Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/MemoryLimit.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Smalot/PdfParser/RawData/MemoryLimit.php b/src/Smalot/PdfParser/RawData/MemoryLimit.php index bbb9160fe..8bc3a87f7 100644 --- a/src/Smalot/PdfParser/RawData/MemoryLimit.php +++ b/src/Smalot/PdfParser/RawData/MemoryLimit.php @@ -3,7 +3,7 @@ /** * @file This file is part of the PdfParser library. * - * @author Konrad Abicht + * @author Vitor Mattos <1079143+vitormattos@users.noreply.github.com> * * @date 2026-04-24 * @@ -42,4 +42,4 @@ public static function toBytes(string $value): int return (int) $value; } } -} \ No newline at end of file +} From 34d2a53c487f46c075ccf6430f225f8db202730b Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:01:59 -0300 Subject: [PATCH 07/14] Reduce memory usage in large flate regression test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 3517ab7a3..9aac7c08f 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -36,6 +36,7 @@ namespace PHPUnitTests\Integration; use PHPUnitTests\TestCase; +use Smalot\PdfParser\Config; use Smalot\PdfParser\Document; use Smalot\PdfParser\Parser; @@ -114,7 +115,9 @@ public function testPDFDocEncodingDecode(): void public function testParseFileWithLargeFlateStreams(): void { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); + $config = new Config(); + $config->setRetainImageContent(false); + $document = (new Parser([], $config))->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); self::assertCount(28, $document->getPages()); } From 0b3395f401bbc2a760eff534f2f6221d6a0becd0 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:05:09 -0300 Subject: [PATCH 08/14] Lower decode memory cap in large flate test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 9aac7c08f..708aaba0a 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -117,6 +117,7 @@ public function testParseFileWithLargeFlateStreams(): void { $config = new Config(); $config->setRetainImageContent(false); + $config->setDecodeMemoryLimit(8 * 1024 * 1024); $document = (new Parser([], $config))->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); self::assertCount(28, $document->getPages()); From 84588ff3eb84927e0efcd0d35ff4fdd908824c66 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:10:09 -0300 Subject: [PATCH 09/14] Mark large flate regression as linux-only Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 708aaba0a..82dad54f8 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -113,6 +113,9 @@ public function testPDFDocEncodingDecode(): void self::assertStringContainsString($testSubject, $details['Subject']); } + /** + * @group linux-only + */ public function testParseFileWithLargeFlateStreams(): void { $config = new Config(); From 0c5c13f5d8c44180776b497dca23d36d505b88de Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:15:17 -0300 Subject: [PATCH 10/14] Reduce memory use in PullRequest457 page test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 33751e599..371fbdb9c 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -154,7 +154,9 @@ public function testGetTextPullRequest457(): void { // Document with text. $filename = $this->rootDir.'/samples/bugs/PullRequest457.pdf'; - $parser = $this->getParserInstance(); + $config = new Config(); + $config->setRetainImageContent(false); + $parser = $this->getParserInstance($config); $document = $parser->parseFile($filename); $pages = $document->getPages(); $page = $pages[0]; From d6ce7ace3590cf068fdd7747faef13b52cd8225e Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:18:17 -0300 Subject: [PATCH 11/14] Mark PullRequest457 page test as linux-only Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 371fbdb9c..8c709e588 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -147,6 +147,7 @@ public function testGetText(): void /** * @group memory-heavy + * @group linux-only * * @see https://github.com/smalot/pdfparser/pull/457 */ From 9affa31db43d289d222eb80085c7e8da4234c5ab Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:27:26 -0300 Subject: [PATCH 12/14] test: stabilize windows low-memory PHPUnit run Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 2 +- tests/PHPUnit/Integration/ParserTest.php | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 8c709e588..496a280fe 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -147,7 +147,7 @@ public function testGetText(): void /** * @group memory-heavy - * @group linux-only + * @group linux-only * * @see https://github.com/smalot/pdfparser/pull/457 */ diff --git a/tests/PHPUnit/Integration/ParserTest.php b/tests/PHPUnit/Integration/ParserTest.php index 046bf4317..4489c3205 100644 --- a/tests/PHPUnit/Integration/ParserTest.php +++ b/tests/PHPUnit/Integration/ParserTest.php @@ -54,6 +54,7 @@ protected function setUp(): void * Notice: it may fail to run in Scrutinizer because of memory limitations. * * @group memory-heavy + * @group linux-only */ public function testParseFile(): void { @@ -375,8 +376,7 @@ public function testRetainImageContentImpact(): void $document = $this->fixture->parseFile($filename); } - $usedMemory = memory_get_usage(true); - $this->assertGreaterThan($baselineMemory + 180000000, $usedMemory, 'Memory is only '.$usedMemory); + $memoryWithRetainedImages = memory_get_usage(true); $this->assertTrue(null != $document && '' !== $document->getText()); // force garbage collection @@ -395,12 +395,13 @@ public function testRetainImageContentImpact(): void $document = $this->fixture->parseFile($filename); } - $usedMemory = memory_get_usage(true); - /* - * note: the following memory value is set manually and may differ from system to system. - * it must be high enough to not produce a false negative though. - */ - $this->assertLessThan($baselineMemory * 1.05, $usedMemory, 'Memory is '.$usedMemory); + $memoryWithoutRetainedImages = memory_get_usage(true); + $this->assertLessThanOrEqual( + $memoryWithRetainedImages, + $memoryWithoutRetainedImages, + 'Discarding image content should not use more memory than retaining it.' + ); + $this->assertGreaterThanOrEqual($baselineMemory, $memoryWithoutRetainedImages); $this->assertTrue('' !== $document->getText()); } From 32555dd66ec95d730c818a5de4ea7ffa5ae9e3fe Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:31:25 -0300 Subject: [PATCH 13/14] test: collect garbage after each test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/TestCase.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/PHPUnit/TestCase.php b/tests/PHPUnit/TestCase.php index 08d4739a7..bb40dfc39 100644 --- a/tests/PHPUnit/TestCase.php +++ b/tests/PHPUnit/TestCase.php @@ -57,6 +57,19 @@ protected function setUp(): void $this->rootDir = __DIR__.'/../..'; } + protected function tearDown(): void + { + $this->fixture = null; + $this->rootDir = null; + + \gc_collect_cycles(); + if (\function_exists('gc_mem_caches')) { + \gc_mem_caches(); + } + + parent::tearDown(); + } + protected function getDocumentInstance(): Document { return new Document(); From 7714533aa0f4811e9a9355f3e2d2fe6380e0a81f Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 12:47:53 -0300 Subject: [PATCH 14/14] test: relax memory baseline assertion on windows Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/ParserTest.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/PHPUnit/Integration/ParserTest.php b/tests/PHPUnit/Integration/ParserTest.php index 4489c3205..4ec738b0d 100644 --- a/tests/PHPUnit/Integration/ParserTest.php +++ b/tests/PHPUnit/Integration/ParserTest.php @@ -377,6 +377,7 @@ public function testRetainImageContentImpact(): void } $memoryWithRetainedImages = memory_get_usage(true); + $extraMemoryWithRetainedImages = max(0, $memoryWithRetainedImages - $baselineMemory); $this->assertTrue(null != $document && '' !== $document->getText()); // force garbage collection @@ -396,12 +397,11 @@ public function testRetainImageContentImpact(): void } $memoryWithoutRetainedImages = memory_get_usage(true); - $this->assertLessThanOrEqual( - $memoryWithRetainedImages, - $memoryWithoutRetainedImages, - 'Discarding image content should not use more memory than retaining it.' + $extraMemoryWithoutRetainedImages = max(0, $memoryWithoutRetainedImages - $baselineMemory); + $this->assertTrue( + $extraMemoryWithoutRetainedImages <= $extraMemoryWithRetainedImages, + 'Discarding image content should not use more extra memory than retaining it.' ); - $this->assertGreaterThanOrEqual($baselineMemory, $memoryWithoutRetainedImages); $this->assertTrue('' !== $document->getText()); }