diff --git a/samples/bugs/PullRequestXrefSubsectionMultipleSpaces.pdf b/samples/bugs/PullRequestXrefSubsectionMultipleSpaces.pdf new file mode 100644 index 00000000..508c1974 Binary files /dev/null and b/samples/bugs/PullRequestXrefSubsectionMultipleSpaces.pdf differ diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index ec8d01e5..0df21551 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -169,7 +169,7 @@ protected function decodeXref(string $pdfData, int $startxref, array $xref = [], // initialize object number $obj_num = 0; // search for cross-reference entries or subsection - while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) { + while (preg_match('/([0-9]+)[\x20]+([0-9]+)[\x20]*([nf]?)(\r\n|[\x20]*[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) { if ($matches[0][1] != $offset) { // we are on another section break; diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 7c7fe7e6..3d5d3de5 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -111,4 +111,11 @@ public function testPDFDocEncodingDecode(): void $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž'; self::assertStringContainsString($testSubject, $details['Subject']); } + + public function testParseFileWithXrefSubsectionHavingMultipleSpaces(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestXrefSubsectionMultipleSpaces.pdf'); + + self::assertCount(1, $document->getPages()); + } }