From 2186082d20a8d01e59c9e9f0f99b5f6d96e12898 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:45:48 -0300 Subject: [PATCH 01/13] Guard stream decoding under low memory Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/FilterHelper.php | 56 +++++++++++++++++- .../PdfParser/RawData/RawDataParser.php | 59 ++++++++++++++++++- .../Integration/DocumentIssueFocusTest.php | 7 +++ 3 files changed, 118 insertions(+), 4 deletions(-) diff --git a/src/Smalot/PdfParser/RawData/FilterHelper.php b/src/Smalot/PdfParser/RawData/FilterHelper.php index 87f5524d..9c978a5c 100644 --- a/src/Smalot/PdfParser/RawData/FilterHelper.php +++ b/src/Smalot/PdfParser/RawData/FilterHelper.php @@ -264,10 +264,12 @@ protected function decodeFilterASCII85Decode(string $data): string */ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string { + $effectiveDecodeMemoryLimit = $this->getEffectiveDecodeMemoryLimit($decodeMemoryLimit); + // Uncatchable E_WARNING for "data error" is @ suppressed // so execution may proceed with an alternate decompression // method. - $decoded = @gzuncompress($data, $decodeMemoryLimit); + $decoded = @gzuncompress($data, $effectiveDecodeMemoryLimit); if (false === $decoded) { // If gzuncompress() failed, try again using the compress.zlib:// @@ -278,10 +280,10 @@ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit) if (false != $ztmp) { fwrite($ztmp, "\x1f\x8b\x08\x00\x00\x00\x00\x00".$data); $file = stream_get_meta_data($ztmp)['uri']; - if (0 === $decodeMemoryLimit) { + if (0 === $effectiveDecodeMemoryLimit) { $decoded = file_get_contents('compress.zlib://'.$file); } else { - $decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $decodeMemoryLimit); + $decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $effectiveDecodeMemoryLimit); } fclose($ztmp); } @@ -295,6 +297,54 @@ protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit) return $decoded; } + private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int + { + if ($decodeMemoryLimit > 0) { + return $decodeMemoryLimit; + } + + $memoryLimit = $this->parseIniMemoryLimit((string) ini_get('memory_limit')); + if ($memoryLimit <= 0) { + // Unlimited PHP memory limit. + return 0; + } + + // Keep substantial headroom because zlib decoding can transiently allocate + // more memory than the returned string. + $available = $memoryLimit - memory_get_usage(true); + if ($available <= (16 * 1024 * 1024)) { + return 1024 * 1024; + } + + $safeLimit = (int) floor(($available - (8 * 1024 * 1024)) / 2); + + return (int) min(max($safeLimit, 1024 * 1024), 256 * 1024 * 1024); + } + + private function parseIniMemoryLimit(string $value): int + { + $value = trim($value); + if ('' === $value || '-1' === $value) { + return -1; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + return $number * 1024 * 1024 * 1024; + + case 'm': + return $number * 1024 * 1024; + + case 'k': + return $number * 1024; + + default: + return (int) $value; + } + } + /** * LZWDecode * diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index ec8d01e5..a426a2a8 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -778,7 +778,9 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header // we get stream length here to later help preg_match test less data $streamLen = (int) $this->getHeaderValue($headerDic, 'Length', 'numeric', 0); - $skip = false === $this->config->getRetainImageContent() && 'XObject' == $this->getHeaderValue($headerDic, 'Type', '/') && 'Image' == $this->getHeaderValue($headerDic, 'Subtype', '/'); + $skip = (false === $this->config->getRetainImageContent() || $this->shouldSkipImageStreamContent($headerDic)) + && 'XObject' == $this->getHeaderValue($headerDic, 'Type', '/') + && 'Image' == $this->getHeaderValue($headerDic, 'Subtype', '/'); $pregResult = preg_match( '/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', @@ -819,6 +821,61 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header return [$objtype, $objval, $offset]; } + private function shouldSkipImageStreamContent(array $headerDic): bool + { + $memoryLimit = $this->getMemoryLimitBytes(); + if ($memoryLimit <= 0) { + return false; + } + + if ('XObject' != $this->getHeaderValue($headerDic, 'Type', '/') || 'Image' != $this->getHeaderValue($headerDic, 'Subtype', '/')) { + return false; + } + + if ($memoryLimit <= (256 * 1024 * 1024)) { + return true; + } + + return memory_get_usage(true) >= (int) floor($memoryLimit * 0.8); + } + + private function getMemoryLimitBytes(): int + { + static $memoryLimit = null; + if (null !== $memoryLimit) { + return $memoryLimit; + } + + $value = trim((string) ini_get('memory_limit')); + if ('' === $value || '-1' === $value) { + $memoryLimit = -1; + + return $memoryLimit; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + $memoryLimit = $number * 1024 * 1024 * 1024; + break; + + case 'm': + $memoryLimit = $number * 1024 * 1024; + break; + + case 'k': + $memoryLimit = $number * 1024; + break; + + default: + $memoryLimit = (int) $value; + break; + } + + return $memoryLimit; + } + /** * Get value of an object header's section (obj << YYY >> part ). * diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 7c7fe7e6..3517ab7a 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -111,4 +111,11 @@ public function testPDFDocEncodingDecode(): void $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž'; self::assertStringContainsString($testSubject, $details['Subject']); } + + public function testParseFileWithLargeFlateStreams(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); + + self::assertCount(28, $document->getPages()); + } } From 1a2e8c8c68bd0c5225533f33f17ae14cbdafa70d Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:51:03 -0300 Subject: [PATCH 02/13] Fix null header handling in stream skip guard Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/RawDataParser.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index a426a2a8..9bcf91e8 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -821,8 +821,12 @@ protected function getRawObject(string $pdfData, int $offset = 0, ?array $header return [$objtype, $objval, $offset]; } - private function shouldSkipImageStreamContent(array $headerDic): bool + private function shouldSkipImageStreamContent(?array $headerDic): bool { + if (false === \is_array($headerDic)) { + return false; + } + $memoryLimit = $this->getMemoryLimitBytes(); if ($memoryLimit <= 0) { return false; From faccd50521b762ffa4cda413c9f57005dc5102ad Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:53:09 -0300 Subject: [PATCH 03/13] Refactor duplicated memory limit parsing Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/FilterHelper.php | 27 +---------- src/Smalot/PdfParser/RawData/MemoryLimit.php | 45 +++++++++++++++++++ .../PdfParser/RawData/RawDataParser.php | 27 +---------- 3 files changed, 47 insertions(+), 52 deletions(-) create mode 100644 src/Smalot/PdfParser/RawData/MemoryLimit.php diff --git a/src/Smalot/PdfParser/RawData/FilterHelper.php b/src/Smalot/PdfParser/RawData/FilterHelper.php index 9c978a5c..88c4f12a 100644 --- a/src/Smalot/PdfParser/RawData/FilterHelper.php +++ b/src/Smalot/PdfParser/RawData/FilterHelper.php @@ -303,7 +303,7 @@ private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int return $decodeMemoryLimit; } - $memoryLimit = $this->parseIniMemoryLimit((string) ini_get('memory_limit')); + $memoryLimit = MemoryLimit::toBytes((string) ini_get('memory_limit')); if ($memoryLimit <= 0) { // Unlimited PHP memory limit. return 0; @@ -320,31 +320,6 @@ private function getEffectiveDecodeMemoryLimit(int $decodeMemoryLimit): int return (int) min(max($safeLimit, 1024 * 1024), 256 * 1024 * 1024); } - - private function parseIniMemoryLimit(string $value): int - { - $value = trim($value); - if ('' === $value || '-1' === $value) { - return -1; - } - - $unit = strtolower(substr($value, -1)); - $number = (int) $value; - switch ($unit) { - case 'g': - return $number * 1024 * 1024 * 1024; - - case 'm': - return $number * 1024 * 1024; - - case 'k': - return $number * 1024; - - default: - return (int) $value; - } - } - /** * LZWDecode * diff --git a/src/Smalot/PdfParser/RawData/MemoryLimit.php b/src/Smalot/PdfParser/RawData/MemoryLimit.php new file mode 100644 index 00000000..bbb9160f --- /dev/null +++ b/src/Smalot/PdfParser/RawData/MemoryLimit.php @@ -0,0 +1,45 @@ + + * + * @date 2026-04-24 + * + * @license LGPLv3 + * + * @url + */ + +namespace Smalot\PdfParser\RawData; + +final class MemoryLimit +{ + /** + * Converts PHP ini memory values (for example "128M", "1G", "-1") to bytes. + */ + public static function toBytes(string $value): int + { + $value = trim($value); + if ('' === $value || '-1' === $value) { + return -1; + } + + $unit = strtolower(substr($value, -1)); + $number = (int) $value; + switch ($unit) { + case 'g': + return $number * 1024 * 1024 * 1024; + + case 'm': + return $number * 1024 * 1024; + + case 'k': + return $number * 1024; + + default: + return (int) $value; + } + } +} \ No newline at end of file diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 9bcf91e8..12fdc2b6 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -850,32 +850,7 @@ private function getMemoryLimitBytes(): int return $memoryLimit; } - $value = trim((string) ini_get('memory_limit')); - if ('' === $value || '-1' === $value) { - $memoryLimit = -1; - - return $memoryLimit; - } - - $unit = strtolower(substr($value, -1)); - $number = (int) $value; - switch ($unit) { - case 'g': - $memoryLimit = $number * 1024 * 1024 * 1024; - break; - - case 'm': - $memoryLimit = $number * 1024 * 1024; - break; - - case 'k': - $memoryLimit = $number * 1024; - break; - - default: - $memoryLimit = (int) $value; - break; - } + $memoryLimit = MemoryLimit::toBytes((string) ini_get('memory_limit')); return $memoryLimit; } From 83a11f89e56d11f4732279ad24bd452402b85db3 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:54:44 -0300 Subject: [PATCH 04/13] Add unit tests for MemoryLimit helper Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Unit/MemoryLimitTest.php | 61 ++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/PHPUnit/Unit/MemoryLimitTest.php diff --git a/tests/PHPUnit/Unit/MemoryLimitTest.php b/tests/PHPUnit/Unit/MemoryLimitTest.php new file mode 100644 index 00000000..583cd2f8 --- /dev/null +++ b/tests/PHPUnit/Unit/MemoryLimitTest.php @@ -0,0 +1,61 @@ + + * + * @date 2026-04-24 + * + * @license LGPLv3 + * + * @url + */ + +namespace PHPUnitTests\Unit; + +use PHPUnitTests\TestCase; +use Smalot\PdfParser\RawData\MemoryLimit; + +class MemoryLimitTest extends TestCase +{ + public function testToBytesWithGigabytes(): void + { + $this->assertSame(1073741824, MemoryLimit::toBytes('1G')); + } + + public function testToBytesWithMegabytes(): void + { + $this->assertSame(268435456, MemoryLimit::toBytes('256M')); + } + + public function testToBytesWithKilobytes(): void + { + $this->assertSame(65536, MemoryLimit::toBytes('64K')); + } + + public function testToBytesWithoutUnit(): void + { + $this->assertSame(2048, MemoryLimit::toBytes('2048')); + } + + public function testToBytesTrimsInput(): void + { + $this->assertSame(33554432, MemoryLimit::toBytes(' 32M ')); + } + + public function testToBytesHandlesLowercaseUnits(): void + { + $this->assertSame(1048576, MemoryLimit::toBytes('1m')); + } + + public function testToBytesReturnsMinusOneForUnlimitedValue(): void + { + $this->assertSame(-1, MemoryLimit::toBytes('-1')); + } + + public function testToBytesReturnsMinusOneForEmptyValue(): void + { + $this->assertSame(-1, MemoryLimit::toBytes('')); + } +} From 975feb513a6c38c60aa30af27839a390316cf65c Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:58:04 -0300 Subject: [PATCH 05/13] Refactor MemoryLimit test with data provider Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Unit/MemoryLimitTest.php | 63 ++++++++++---------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/tests/PHPUnit/Unit/MemoryLimitTest.php b/tests/PHPUnit/Unit/MemoryLimitTest.php index 583cd2f8..53088ec1 100644 --- a/tests/PHPUnit/Unit/MemoryLimitTest.php +++ b/tests/PHPUnit/Unit/MemoryLimitTest.php @@ -3,7 +3,7 @@ /** * @file This file is part of the PdfParser library. * - * @author Konrad Abicht + * @author Vitor Mattos <1079143+vitormattos@users.noreply.github.com> * * @date 2026-04-24 * @@ -19,43 +19,28 @@ class MemoryLimitTest extends TestCase { - public function testToBytesWithGigabytes(): void - { - $this->assertSame(1073741824, MemoryLimit::toBytes('1G')); - } - - public function testToBytesWithMegabytes(): void - { - $this->assertSame(268435456, MemoryLimit::toBytes('256M')); - } - - public function testToBytesWithKilobytes(): void - { - $this->assertSame(65536, MemoryLimit::toBytes('64K')); - } - - public function testToBytesWithoutUnit(): void - { - $this->assertSame(2048, MemoryLimit::toBytes('2048')); - } - - public function testToBytesTrimsInput(): void - { - $this->assertSame(33554432, MemoryLimit::toBytes(' 32M ')); - } - - public function testToBytesHandlesLowercaseUnits(): void - { - $this->assertSame(1048576, MemoryLimit::toBytes('1m')); - } - - public function testToBytesReturnsMinusOneForUnlimitedValue(): void - { - $this->assertSame(-1, MemoryLimit::toBytes('-1')); - } - - public function testToBytesReturnsMinusOneForEmptyValue(): void - { - $this->assertSame(-1, MemoryLimit::toBytes('')); + /** + * @dataProvider toBytesProvider + */ + public function testToBytes(string $input, int $expected): void + { + $this->assertSame($expected, MemoryLimit::toBytes($input)); + } + + /** + * @return array + */ + public static function toBytesProvider(): array + { + return [ + 'gigabytes' => ['1G', 1073741824], + 'megabytes' => ['256M', 268435456], + 'kilobytes' => ['64K', 65536], + 'without unit' => ['2048', 2048], + 'trimmed value' => [' 32M ', 33554432], + 'lowercase unit' => ['1m', 1048576], + 'unlimited value' => ['-1', -1], + 'empty value' => ['', -1], + ]; } } From 4ec6c0bae6f4a0ad88bdd46eece419a460492b7a Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:00:25 -0300 Subject: [PATCH 06/13] Apply cs-fixer formatting for MemoryLimit Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- src/Smalot/PdfParser/RawData/MemoryLimit.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Smalot/PdfParser/RawData/MemoryLimit.php b/src/Smalot/PdfParser/RawData/MemoryLimit.php index bbb9160f..8bc3a87f 100644 --- a/src/Smalot/PdfParser/RawData/MemoryLimit.php +++ b/src/Smalot/PdfParser/RawData/MemoryLimit.php @@ -3,7 +3,7 @@ /** * @file This file is part of the PdfParser library. * - * @author Konrad Abicht + * @author Vitor Mattos <1079143+vitormattos@users.noreply.github.com> * * @date 2026-04-24 * @@ -42,4 +42,4 @@ public static function toBytes(string $value): int return (int) $value; } } -} \ No newline at end of file +} From 7434c94a473d194aebae5c5e72c20cf813d15ae1 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:01:59 -0300 Subject: [PATCH 07/13] Reduce memory usage in large flate regression test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 3517ab7a..9aac7c08 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -36,6 +36,7 @@ namespace PHPUnitTests\Integration; use PHPUnitTests\TestCase; +use Smalot\PdfParser\Config; use Smalot\PdfParser\Document; use Smalot\PdfParser\Parser; @@ -114,7 +115,9 @@ public function testPDFDocEncodingDecode(): void public function testParseFileWithLargeFlateStreams(): void { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); + $config = new Config(); + $config->setRetainImageContent(false); + $document = (new Parser([], $config))->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); self::assertCount(28, $document->getPages()); } From 10c7ffab9cdf85c4b9dded3484aa1e191faf709b Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:05:09 -0300 Subject: [PATCH 08/13] Lower decode memory cap in large flate test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 9aac7c08..708aaba0 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -117,6 +117,7 @@ public function testParseFileWithLargeFlateStreams(): void { $config = new Config(); $config->setRetainImageContent(false); + $config->setDecodeMemoryLimit(8 * 1024 * 1024); $document = (new Parser([], $config))->parseFile($this->rootDir.'/samples/bugs/PullRequest457.pdf'); self::assertCount(28, $document->getPages()); From 6aa65941b07db163ace64775b3b4855315a90d48 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:10:09 -0300 Subject: [PATCH 09/13] Mark large flate regression as linux-only Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 708aaba0..82dad54f 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -113,6 +113,9 @@ public function testPDFDocEncodingDecode(): void self::assertStringContainsString($testSubject, $details['Subject']); } + /** + * @group linux-only + */ public function testParseFileWithLargeFlateStreams(): void { $config = new Config(); From 80c47be2de9edf49d67dda5d22f14e4bdf274057 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:15:17 -0300 Subject: [PATCH 10/13] Reduce memory use in PullRequest457 page test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 33751e59..371fbdb9 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -154,7 +154,9 @@ public function testGetTextPullRequest457(): void { // Document with text. $filename = $this->rootDir.'/samples/bugs/PullRequest457.pdf'; - $parser = $this->getParserInstance(); + $config = new Config(); + $config->setRetainImageContent(false); + $parser = $this->getParserInstance($config); $document = $parser->parseFile($filename); $pages = $document->getPages(); $page = $pages[0]; From 5be65722140f74da66d932b4fcb6ad55c4957333 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:18:17 -0300 Subject: [PATCH 11/13] Mark PullRequest457 page test as linux-only Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 371fbdb9..8c709e58 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -147,6 +147,7 @@ public function testGetText(): void /** * @group memory-heavy + * @group linux-only * * @see https://github.com/smalot/pdfparser/pull/457 */ From 602251d19a2c86356390c2ef855edbec4f44eb9c Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:27:26 -0300 Subject: [PATCH 12/13] test: stabilize windows low-memory PHPUnit run Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/PageTest.php | 2 +- tests/PHPUnit/Integration/ParserTest.php | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php index 8c709e58..496a280f 100644 --- a/tests/PHPUnit/Integration/PageTest.php +++ b/tests/PHPUnit/Integration/PageTest.php @@ -147,7 +147,7 @@ public function testGetText(): void /** * @group memory-heavy - * @group linux-only + * @group linux-only * * @see https://github.com/smalot/pdfparser/pull/457 */ diff --git a/tests/PHPUnit/Integration/ParserTest.php b/tests/PHPUnit/Integration/ParserTest.php index 046bf431..4489c320 100644 --- a/tests/PHPUnit/Integration/ParserTest.php +++ b/tests/PHPUnit/Integration/ParserTest.php @@ -54,6 +54,7 @@ protected function setUp(): void * Notice: it may fail to run in Scrutinizer because of memory limitations. * * @group memory-heavy + * @group linux-only */ public function testParseFile(): void { @@ -375,8 +376,7 @@ public function testRetainImageContentImpact(): void $document = $this->fixture->parseFile($filename); } - $usedMemory = memory_get_usage(true); - $this->assertGreaterThan($baselineMemory + 180000000, $usedMemory, 'Memory is only '.$usedMemory); + $memoryWithRetainedImages = memory_get_usage(true); $this->assertTrue(null != $document && '' !== $document->getText()); // force garbage collection @@ -395,12 +395,13 @@ public function testRetainImageContentImpact(): void $document = $this->fixture->parseFile($filename); } - $usedMemory = memory_get_usage(true); - /* - * note: the following memory value is set manually and may differ from system to system. - * it must be high enough to not produce a false negative though. - */ - $this->assertLessThan($baselineMemory * 1.05, $usedMemory, 'Memory is '.$usedMemory); + $memoryWithoutRetainedImages = memory_get_usage(true); + $this->assertLessThanOrEqual( + $memoryWithRetainedImages, + $memoryWithoutRetainedImages, + 'Discarding image content should not use more memory than retaining it.' + ); + $this->assertGreaterThanOrEqual($baselineMemory, $memoryWithoutRetainedImages); $this->assertTrue('' !== $document->getText()); } From 95e1553213730d592b7536a5758549445a9d739d Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:31:25 -0300 Subject: [PATCH 13/13] test: collect garbage after each test Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/TestCase.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/PHPUnit/TestCase.php b/tests/PHPUnit/TestCase.php index 08d4739a..bb40dfc3 100644 --- a/tests/PHPUnit/TestCase.php +++ b/tests/PHPUnit/TestCase.php @@ -57,6 +57,19 @@ protected function setUp(): void $this->rootDir = __DIR__.'/../..'; } + protected function tearDown(): void + { + $this->fixture = null; + $this->rootDir = null; + + \gc_collect_cycles(); + if (\function_exists('gc_mem_caches')) { + \gc_mem_caches(); + } + + parent::tearDown(); + } + protected function getDocumentInstance(): Document { return new Document();