From be240ba9a38fa66d8b528ac977d13a8b7452c3b6 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:59:46 -0300 Subject: [PATCH 1/4] fix: allow startxref offset to include leading whitespace Some PDFs set startxref to the whitespace immediately before the xref keyword instead of the first letter of xref. The parser required an exact match and incorrectly switched to xref stream decoding, which then failed with Invalid object reference. Changes: - Skip PDF whitespace before checking startxref position - Use adjusted offset when decoding classic xref - Apply same whitespace tolerance for Unix line-ending detection - Tighten trailer key regexes to match /Size /Root /Encrypt /Info /Prev - Add regression fixture and integration test Regression fixture: - samples/bugs/PullRequestXrefWhitespaceStart.pdf Test: - DocumentIssueFocusTest::testParseFileWhenStartxrefPointsToLeadingWhitespace Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- .../bugs/PullRequestXrefWhitespaceStart.pdf | Bin 0 -> 6499 bytes .../PdfParser/RawData/RawDataParser.php | 20 +++++++++++------- .../Integration/DocumentIssueFocusTest.php | 7 ++++++ 3 files changed, 19 insertions(+), 8 deletions(-) create mode 100644 samples/bugs/PullRequestXrefWhitespaceStart.pdf diff --git a/samples/bugs/PullRequestXrefWhitespaceStart.pdf b/samples/bugs/PullRequestXrefWhitespaceStart.pdf new file mode 100644 index 0000000000000000000000000000000000000000..718557609b254ab33923a92168582a720f6a4cf2 GIT binary patch literal 6499 zcmeHLU5p!772YI($|!Ebk4g|A-EK)YA+hiL*j|t8b-T9Xu<7n*<8?~VX3Ngb-1Sa6 zo|(*yv)-TxQ2P)B50MILB^sfY3Y8Z`rM^@}>LPyBR@oF);-TV!JRlw*5=Q5-wnkgzIc5W72CuYZ6 zN1{+Dg1GBBMCoR@$|HSSQJ*9pS|uIboSOLi>u*dTU7MQtT49c#v+HDC|LP@&EMA&# ztCyZsWo_b-YNpaJ_q%o%;)42J!*t92sc{8DLdx(=oAG!xgHXlM*2=9$GZ+MKQ{(HN zXO}VF*x1NzNIA>tU_q8;%!^nQvoIp-ZknFb&zkOoOh=#vmQZzco341EO;aOfcF!@G zDow@2AYGbbSjY)@9MKslt5(mO8V}TgLQO5J;J#-$be8^y&2^Dn7_aL{GPAL!#DoJ+ZEa0C-iM9jg7=ZrbOjY% z?=r{f5Nx`bgu}pfKQgpuQC%lpWp_!Jjd?IG2FYp@$J^Xp&<;i=L}@YDU5sXCLZqn^ zL)%>jcA23;T5w^|AVi79Df?cy@ex57Yf2~C8ikiFxve-Z@n_v4uGMo?lCTpb$zU}D zQ`LIU^{g)P4Oj5+fY@Y*m6%GGxUd1qnbu4-wl!3yHK$r#8r80%uG3nIMVzpZklxjf z?T|ISUsc_Ne9(FuNk0kFi${Hl_RNrmsMpZ{i{*{2PndgU>SMJ|U;U{kJ4PA~vLYIJ zQ89Wfw$+B>k(8Q2=N;)%K|{xKHmh`}Ii_Ny)J6Swq)i)CwW@1%35qqO6vui-g#vsI zOVS9-ejhDPL!G`Eze1GlU@xbKjn?hArZ$$ZFDh5VJJOeml|0|rP$3KsmaZMu1k=s2 zLxydOzG6d(=Tjvfik@Ar`?bAoS;o|OSi9SB2BTU}H#FkVA{PuJG$p3SMP4Xoc`?fi zZ9ysva#<9P@#9c4jmf0Ho`VYm7yrYxhak#TGilPwE)yOUBy&k$!fl3Kb&yL11i7~EP$5? zSO8d?n%JPQ0iJx^5dt_0&m#1qdjO0Mhw<`gyxgAdkuoY~i&-%%!N11~SzB@4ta^kM z3l94@Pv9vq3gJ>A0D1z&r_j2fH~;M_rCP+u~$x?Jbyvlx9=x=u3l?B z(Hi?~=lQ+b_r6S?dhh6?w_d!8I_R7CUEO~6UggJoUU>%HdKg{&)bBs@=m%f8`Y(_F z(RcS4_dcE7yZY=)kAM3Xv2Wm)kG(zn&f(L%$ny*K>|3W!p1J4e$8s~<*~kBJ{jnSP zD{tbH$7bgC$lR^!mDl%u@$&mmEnoZ6<(c>S2gbN>-SamWmCk+sw>Qvl4xraweR==e znkGJU{ls&p-`(C?S-!Eg^~-$^+&{6s^@~3|{hjH*K2)3i_`&-=d}06orN4}gU3+i7 znW3>wMwx=vaGmQ=VQN+%Ezy7#c=k_efMcnP0792A#aXxw)s>M07Po*M0Q&ODgAh;( zdR!yMAUGTz+AgY8@Yz$V=ZWgEM;-}`YgFMwsf4C0BkYHu83}1*E(enrgnThfTb6C1 zM{PfERYU2yWRR)=`-8L#ILJKsgaqH0K@PGMO{pS%8)i>51lYg@N>oPeb%FwB1&sk4 z(%?v^1k1+?D6HKNv(Yxl4Td_zHUMO}$OA=8j&T=ctwD?KMzTYbUU0}oB&2y43qUyO zfLBO-YmEJ$)$%gD0-U zj3R@R!xlP%jpFDxh_Jr!7X%F|RHVP~!rtMF_18u(M}iOf3FcY0t|DI8Ax1#~^s~%Q zxmcDZXriw#$})$zJ;N&_T37w5iqg63r$Hi-{n8pUF`}01rt8te0;xMpI@sC%q*Gaz z;n-eGCPlbUV_6X=w?a`KklX)_+j4ipZ2?8GQ~v+qws*PTfS}&xes{Sa!*h(z0~lcU z4j(P|qgRg|;-qNRUHV6{{cY&q4&3si;(rK5taOMw#qaNe8!gpM(3u^tO2Vrwb-*C$ zomp}`FdSVqh}r9|5(mSLVVwF_RJY8o9;&4Zj-CgTWsEz;WCwhRi}Rt_VHEM9A*7t) zGF%^SYBC()i4uHAA5<(9qynNn!H0q}`ax#YCgcGar`kk{tWs@K9?soVTb_plJ=rD} zB>=yvw#njE7GxpcEb)_7RFP{nxmXj61-TB(T5n2?Qk|C@lh7^6b+J}m&HzNsz#TkfpC9jn OTq 0) { + if (preg_match('/\/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { $xref['trailer']['size'] = (int) $matches[1]; } - if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { + if (preg_match('/\/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { $xref['trailer']['root'] = (int) $matches[1].'_'.(int) $matches[2]; } - if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { + if (preg_match('/\/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { $xref['trailer']['encrypt'] = (int) $matches[1].'_'.(int) $matches[2]; } - if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { + if (preg_match('/\/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { $xref['trailer']['info'] = (int) $matches[1].'_'.(int) $matches[2]; } if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { @@ -216,7 +216,7 @@ protected function decodeXref(string $pdfData, int $startxref, array $xref = [], $xref['trailer']['id'][1] = $matches[2]; } } - if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { + if (preg_match('/\/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { $offset = (int) $matches[1]; if (0 != $offset) { // get previous xref @@ -922,14 +922,18 @@ protected function getXrefData(string $pdfData, int $offset = 0, array $xref = [ throw new \Exception('Unable to find xref (PDF corrupted?)'); } + // Some files point startxref to the whitespace right before the xref keyword. + $startxrefOffset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref); + // check xref position - if (strpos($pdfData, 'xref', $startxref) == $startxref) { + if (strpos($pdfData, 'xref', $startxrefOffset) == $startxrefOffset) { // Cross-Reference - $xref = $this->decodeXref($pdfData, $startxref, $xref, $visitedOffsets); + $xref = $this->decodeXref($pdfData, $startxrefOffset, $xref, $visitedOffsets); } else { // Check if the $pdfData might have the wrong line-endings $pdfDataUnix = str_replace("\r\n", "\n", $pdfData); - if ($startxref < \strlen($pdfDataUnix) && strpos($pdfDataUnix, 'xref', $startxref) == $startxref) { + $startxrefUnixOffset = $startxref + strspn($pdfDataUnix, $this->config->getPdfWhitespaces(), $startxref); + if ($startxrefUnixOffset < \strlen($pdfDataUnix) && strpos($pdfDataUnix, 'xref', $startxrefUnixOffset) == $startxrefUnixOffset) { // Return Unix-line-ending flag $xref = ['Unix' => true]; } else { diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index 7c7fe7e6..bd85d8cb 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -111,4 +111,11 @@ public function testPDFDocEncodingDecode(): void $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž'; self::assertStringContainsString($testSubject, $details['Subject']); } + + public function testParseFileWhenStartxrefPointsToLeadingWhitespace(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestXrefWhitespaceStart.pdf'); + + self::assertSame(1, count($document->getPages())); + } } From 583526e5e4a901fb0f00e273e3836dc69d963995 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:05:13 -0300 Subject: [PATCH 2/4] test: use assertCount for page count assertion Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- tests/PHPUnit/Integration/DocumentIssueFocusTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index bd85d8cb..836bc7a1 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -116,6 +116,6 @@ public function testParseFileWhenStartxrefPointsToLeadingWhitespace(): void { $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestXrefWhitespaceStart.pdf'); - self::assertSame(1, count($document->getPages())); + self::assertCount(1, $document->getPages()); } } From a3d9df020b26494e759bc90c8b73923740e1b1cd Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:59:17 -0300 Subject: [PATCH 3/4] test: add pdf.js compressed xref regression Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- samples/bugs/PullRequest797.pdf | Bin 0 -> 8286 bytes .../Integration/DocumentIssueFocusTest.php | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 samples/bugs/PullRequest797.pdf diff --git a/samples/bugs/PullRequest797.pdf b/samples/bugs/PullRequest797.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f3e25216d231f4b18c14e33b00bfc7877650c30c GIT binary patch literal 8286 zcmZ{qcU;p+(D0Qip$P~GD2Cn%od8m#9layHgb+FjkkEVYh9XGsMUbvYlMaG(REi*= z2m%TLrS~UzchC9WJ@0-#ncbb=?CkE$>|fuo=_o4m3GxdA*eBC%91(Cu2nq(|RFo7F5EK%?3PCXeu&|&h*KfW> zfAg*E=z_v}1}VE>*D1oF2wNBcqydB5qa1)>iK|6O6m~CHfKLwf@B&eY014iwLycsj z7ws2ZG2~%dN%$`s$uY7aMmSbv{1O_kkXiBh%qga+$6=4&!@>z~cHv~wWYt-(c4RUd z(ECoV57V)uf(%zac{OTlonGfAr(wAY_+NYc75b}?{x|ejiSmG94KDfmhem1Az2kNQ9>c6q{o4Uy6THd^P`jA8d@F`Y;sG z41|p_5M%)J!6r)86l;J>~8MTx8pyH?avbUC)*yL zu&WQhSH**0jYkK9&4eQy!6%P!vHf!=MHmw5;pldiZ^5g(VlSoa=z&BjI6ypr!ot{m zLH?SEfB_&QM_ZHw5{Qk*FH0pjHm8nodmu;?0@XvfLg0UEunz%Y2zR_nIZ*KGef(bj z62D#HuW}F*78m`yHUxnZ!a@R9=lE~={9ZHFCPY4F{f&7o&&1S?ve|~yQLY~vzDTFD zAsW~~?m0ee#+HuuZ@~pNEJpLo^))Rv>fJT`j3qSkL5_LjgW?1yC%Vj=iHUvk^G5M zCsgJ=@?NV4VU)Lvf5~Wp!rt(;^;(8Ud-%@}RtvqO2;M)D0aMO*U_4syG)C*T!QFIj zI9VBWgwJVzIb`YHDsHgDPZ1v}6qBW=LwD0J6TO#-Kkn_OD17Tqh#yja>)L*~%BxE> zixi_`QRI3%p~cOlYb*xW0Xyreh4Nx739p&4W(|D=`0QAMI;F8#$QrK#wrR$c#M zjPkj(Z&~)^q_>>Jk0U+lzY*diXesrx&cAVpbiUQWkHq9;i|7=K4VACdnUk#w*G<+U z=if={ZB5nTzIfQp?M*h~%P5A!($Za9sp|{?RA{^=WXcH*4or`Ioa!iy2Tq|=O~zg7 zd4+KAFnT7O_5kxPan$`U=WbK47jw6SaKmxlT-b&4qjb-RxX$`Zj`-eb;9n!My4eCu*KsAaa=tWjc*Fz--?! z9ld-1&~Y25BdIii_!%*zwxb!~nBgGO*T@dES3^;!`P99BBqF*Q6WDw?9$cMY^=R|t zorHr6*J4BA#P+vmhj8Ao&cMw>Xs}e^aOYMXZSmBgAy_g~#Axt5gCEp=*BHs)AYD+z4%c1MU*O+GaSTf!$&NO}wFi(uD^tmXeL2Mqr zwk5nKdw=eUs~ATn#j2InJYo1(Rw?c4(KY(P0?DM9s>KZzNN?yvT6`&K3x?L$t#Rt} zb1Wh{n~u?26f!(-;P7qT*IKlPw)j7LU>ub={@jpN62~2+H0gDJv+=B**InW`{(jcv z7!e*8>GM#Xu2+0j3KlLvCV|R`q4qByKP(;}3)|DQ+pmqg*$;eOqaDx5K~wG8%g8ZC zUZ%Mt(AV?dNTK~SE%Uu0-QMu)Qp^X*wCX?0-zJ&-pw zuOOXzAGKgCIYa23lzMqwk1@s6$qf~#uzHjW+&vbQR`;V|8p`30 zN)46$Wp>m_VI&m>8=daFb4dXS<4E{YX5_s<7E&=B?$q6us`=K)Mknfi@$_SHdZq_* zX?k!z^>0Bd$MAN(nG*JtEFNcn0}|UCTD9=Ii-$hXQyy+CEO2#HV4TlQ2_!o~B>8wW>*h7hIW{rg&2~&{1aDWQVQK7F>hBck;4Cme$`?5=~{Tcr(8< zf^iLPXcal%iiSJ7(L2OBmdOdk1#}*swXKjG>(>FALqF9LJ;lws#_<$Cu}gE1eMP^4 z>-aNjl{-U?+RAm?1MHx1p-wQ}68|u4Q$PS|x2Tad%BJ>*Zia$$TW|QZ*AG_HqOirt z)aA0Sy)zsm$4uekG@N!i-f(ecnGJsB;Ygan@Q3)zh7V*#lX2Z6b{;3?X}r2dvM9ZR z!bFXI8$QL{ZRK5nd?iIoeivUl>oaA3V`vXQd*3rPnAcwKqCRZKc_!M}99I!H-*$lD z6LQ|2w>cHlA{E(*s+*QMc|XNPlgWIYF0=Wvr9{K07dOaCbIW_x#(Yfnl6F}}l5Qn@ z7P;lpmN4O3YNwKGwH94r%_H!F?nYwaRNV(TdOMX=*)~Jp>J%NjvNBU|J8sWbgDE0) zIVb>EEa`5+S&P3%Ino}vG+2rxZf>z$>E)oEbgiknc0@Cr0vP%DX7PDD)SeGyMzdD0 zy9c)=_7(&r=TW8-DD_d=7HdS)_-FG_>_g^(w5Zm7xNTcvEP=^{niWj9?&tgLZ!fs} zr-7=5kISl;TlPM_A5oL>qDnKD4@fsE`wE9QhZ#}AqPl-5#F^c&+1R;wj3Zw1!EnRZ z(R4L!!R>tMOA8l6>A(_`)lLXJSnyf&3(GuhontTQwCgEHBpJGuy5W@d#hhG)`lI#f zVpdm8 z&%SixOz)x5viMa7X_+fE+EEtyeh#vTm^UhQpjuZpYlSWkM!g+SX^7M#U?;qE?n$dk zYPyNn8T5TrT%uCpW}Mik@yqZJ;VwGiHk*^oQYF$TA`7+C4X5G(W1GDvY_-~!&}}V@ z)(5-!{_US1zXqxtCRtj!L>n$O8Iu}PKa8KUQC*&y+gbv7(+#;Eco-SZ+KJC@8j5=P zH&^!P=;}r;*Tbs&vmW2F3k`)`M|h{LHORSssg`|7Io&v&1@M=6no*Nc4dn6o9-^eb z>qn|vT(i2B$xp+&F1$M!F}BHHqy8jKy)u(KJFU^oiF-(wG@SM`=`D~I4U_|r3$`pG z1}SfEwjEtyc%8Iq_OgO0(2cD^>BFCGQxO?NAF5OuB&rgI3tQfp$bW@Nn2?#a3*NGI z~EPxClJNm)%Iu`VGStgm&4!4| z7it;j3NvZS5^OA{sttzyPWzeau$?RhjQO4DG10p%PDq@@C7ql8_eR4Qs&nO^tQ)sk92cl|25W?POUXhc15O zJOY?2U2*+8k&FvQci-*0f?Sh$v=on>AWC6sg3f?oEM7vb3fB!kB%aZ`KDy6j@XOa_ z_FlSJ!J@?W9H@19XWRVt{OtL#Zl2yCR|}KZ_JyWhT7n)MIVNbuY_FIh@FJ0cVj$SF z`pE_#+io52Ko@HlJHIr`=Z^XGJfFB3K_UZUw%39N1ow)xL)dquWe@8PKda%CM$pTr zZtXwx>+h@mtVhY+KC$+VH|PZ(>*iiPkgf6Ew3654!@LMUVo}4`aE?v{WW2SjP}11D zwSP|EujlrJZ*L7_#{k_Rk;_<^iGQ}yTd>+>hH}}>T&}!E+IGuZ7vuaY7$Yy;8q8=j z=MTyLzBr+V=6zi@2MzB!40lmqG156+*eHOMfowPSSbk*LhQEv~kYnkhL|zjPFxbyn zB~jvDq4(c?%W)8lnist_Jg5?7lSbouo;zci$yuCNgK$|eL6oYbYdO1?a`tDlY09>U z8mvyA);JzJ7pX=5WS<&psD7+btlW0uWbAES`LJJ)S1ILtBbV0Hz#pAWs~B4&e=D<} z``J9`-B`&Leb+Tv;F6+y@ePA8|1)`BNZ9RrT;vdXkge)Slm||0 zWmmt%OOKEcHQ0VC@42>YY|eXNgW;#=n^;os*4QEiPnkRW zv`m=UOh`w}$NGlTl`uq8Pm?*>zlaaC&6}fIQi#;j`sA`S{W7y#Gp?O3ZjBX>I~yw1RGslg_m#fG z)T{=kl`qm0VbQsg$^_ffEjqpiRyow;O4NgBbg^j@{>Rld#!8<-hC?X-yxHgKt|0U6 z>bm2Pxq0ERD*9u$c)oUF>eIS~(16)_9g4$G`=QuUA((Y*9bME7-F7g&c6{H?Z} z>VSRIFx6zEg+EhCE6bpOijlR@xjasDYyqY>Bq}7CM6J>^m{h2b6Hw4a5bFBe6`lBk zI?9)HuQ1$!NbM3zjBJO_HoykUiri_nPWWrS_9Lmd8>uw$=9buX`wZFXhES- z0p%3_L7OW*Ocb=YwuAg5D%g_mGI=B=xy_Q;s5gA4dy=HZ!(Dr~5xw6$8nCNhc*WFzSt))ePsWEFZAJ+eF?-TyWQebto#bM)Ct0dlNzc=FYSAVJNhPYHoqKk% z#butF_FC%4)B6-q1zgo3Q}M^*vk6(3XH$yW?ZT@aB#qUtS{<9g54w}SCyHj3KF3$j z2f+@f8`IeRtaneTO zO}DM~YPe)%V@U6dY0fT>K5=WD87vzXp+S&RHJ)mkq~US#{(Y`$9i`V%Yh?)Y2l~NL zZZc;a0Q)t+vj(v~^TY%ny9>h?g==GZeX)KNuA9hvF$1@@@7MIS+t6V={QGbWvtzY` z@_dr^9&9GFzwV0@<)PA+=Zr6Y8r#(RaLAk;s%+Y8Ta<1a``8RoM71KR@NJgNS^x7l zy+?x$)b?rRnXt+?WL7wu9`gLf-%IG`9PHr6ejjI8^?9oMbqJA6&;3Z*W^QeAYmY5R z=Gab=9TbUXPA~4#oy7oPNAxUiv04Jyxe!wk|1C0sv^M z4QBCk+sLH7k#H3&W!6jS6uw5a8=rOOLNohpDqF0Z@S$sS@Ga8@dAe`5TC3q=nxhw` zz3NNjc^?h7wl^N!Kk{cZ=&kO%%NiZztxr(mo4{ooG`z_7{EcNAddQw`h2*_IP&qN2 zY#LChBl=F=dWsSEIA+X_5%+%443P07Twxr5DlK-7co9ZWMDj7A^FZ*Bw9-0d|*AZh0l4V^^SljI~{G=Cb`s4c-f{Cbr zCwRHmRI@Ra)yg`A{0eESrgdbAI9=et{-KY(Hp;~z0O^?`nzF4J4Bj6%HH%a3?khz0 zk%y94zC2r?)Tv|W(`L`WP&>+(gfnY0o08!tm)+Ft5r_%i?Ayyw5stBgXq1@L2bZf1 zD}Hd6JL3UiI+aSO`Yz^`OgY9HX(w+^4ddx_`781Ip#*nAj!36jf8J-_h}jrmYv0L; zzL^{Git_MY`W|yADPyVwfVE3P^$(3p-M19)?x{=a$q~vvK`A0{5~V!K&`Lomo+(C^ zuwX#gQG;R{c7#Ypm4xK2C!_irD2QasYSVY17xSz+BEVRG)~xFUHu;Lrtwn!CMC9Wi z_q9<5(HJDD5y=t*2K}&dXpjgiKd$Ig2950IMcjL%CrKnsY;N#8g6`dnYR07bdkXB_ z-|SmMWUrmeeQ86&PVzs#qK#V>67kD1$`G4V6Eg17Od`zm$lN# zMRTswNix_jIY{ym-6Y_|=VNf+539IS^z7ws^Cd8=DERInRdSG7j|R@8AS0}lZU;h? zg-DVFQssIxSVveVgtUl+%hXv%GT>NU*C0wgWAaGDxF^B4CRqTQCrxQLvRe?5i6`{c zgh-fZWrUd04z+0)#0`0gi@Vzz@qvtY#GlemYCYmw zm9d*85p;)3Opw=S+G-hFa>E^*5arvmGpr+C1vZzgSYHyS_1KlRn@emXWD#qxB0gN# zgAout$DM`2v2v%u3oA2Vg4m-z3GBUI)j-HLVtf#Rb$a=rXY(5ShoT6&Q66!XjJsXz zR%{TjPnU7Y0>wX8pQvWsm184U&3GQ7Dx=Flw?IKkO7i+B{LIOKbTut)^`}x~Bx8pk z3T6DsnT4g8N{}gQxR&^Eb=L|8TAue#0GzERe~vZgq-1<@`&o3fZ?bz18t_FPrMEsl z{>^N^cEmk&>n+s2I^<|3>J)G~Lvnf=5vfYa%ES=!CS#@_RQ!qN@j zJb35(2=J2xzXnr`{tXt{9{oP~-f&svgQFQmHq{4sv@IgdgHLYpk+W~%jM?84j_-js(|Hb>$ zT8+-1-zvmqvo4N@YcjbN{@HZ^4v zaspzZ@}G=i+){6H@S{`|Z&967IYggP^PO_$ghV}U&pxlCWhK1!X5We!ZtQfFI&c1bOA>!y0i876F>aJ)=P;4n()a6yB}g-Eq62#8N@dWHJnAGwJ*CIV4@3IT4bFo#HOxdfvkP05>zc#GMB5S1I~CgQ+#_Xh=x+0JK06k)WQ zyl3zFz9>vih>5Jq8B;#H;HaOYuJ`IgL3Ehrcrnp$tQ`bsCir*WEe7{K)2Fbl zXq-W0eamQtIc@h&658kG2Cg?o7o%T!omG7BKC-{`_}PV6^uz4h+b!M?k!Bs7i3|%I z94nMzRJc7L?!sL`yTwN_;Fgq6qD06Z6{)Pt4d(qx0?Bi?ZTkUZi2KR2JX@$&YjX!YSSWGWnWAhuN%0Zi zn`sMPh7~EyYrj4iD2C{5z!Q<7_a_zY`b#-;_YI{CrKiG3cr~U9IxO^;6r|+ zX%~Hdu7A$YJ3GvTp8Z%F?--O7-aUJFRuOF(5G%XiAJ2i74&G~T3OXHrj(Bx26yGQx z?BH|6qkj#U8z!d?`_i5*x`@=_p+c*WOXjze$=bhT?8NMncN&*FrY*%DtqgOlToM$} z2rd4XVEw{0|0O{GfSf< zpoAb+2ug@tF)0^_JrXD^goTPInCn9zmd>bR38y3U0o>jNb~Sx~gkJG8un-m*T^;3W z!Y3pyj%8*LHx-zpy~AH0jsKbm{00qw5u(4Z^nYQWzp0_nzp$ajq&gy8faKaI|DZ_p zLgDhHycrRb=0apIBmF%2jo5d}^h|)yU7c2&4|8AOuB+c!gV?WON|9pn2Ndj6P&{ZZ z8CY9g=LW826_pavT&2&wA6AKP<9XQ*zu$4+%*iZp#FIP7>_L+7xa94Ne4-j{Crs>a zC3fi;mmc4XKa>R*#V5tT#7|^*J>)*mzC$6TwcsV$?`qz}^&Qba^Nm7{k+zAaLiNjL zX%BD#UnzCX6{^8Au@PKY)Bz(=wt(H#G_=c zj!887;mI3)n!wIrjbyR^qj>&C&t_uSpo@x%0l{Dips0WVP)Jk&%lm|XjltOdYW&Lv z`|Ya#`zMUG7ZL|sT=~9AnU4p|4j=#&6b1niVBMY?STKQ6T|}3s|JMsqr=wN zzjcB_SXTHiJ8T{PM<)cvlJI}oiHQ7HCn|#Fu>Z0X6UH*&f9bFk``getPages()); } + + /** + * @see https://github.com/smalot/pdfparser/pull/797 + */ + public function testParseFileWithCompressedXrefObjectFromPdfJsCorpus(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797.pdf'); + + self::assertCount(1, $document->getPages()); + } } From d4c26e9f646d588593f00c7f77b2f7945aff8856 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 12:05:48 -0300 Subject: [PATCH 4/4] test: clarify pull request fixture provenance Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- ...PullRequest797.pdf => PullRequest797-pdf.js.pdf} | Bin ...fWhitespaceStart.pdf => PullRequest797-vera.pdf} | Bin .../PHPUnit/Integration/DocumentIssueFocusTest.php | 7 +++---- 3 files changed, 3 insertions(+), 4 deletions(-) rename samples/bugs/{PullRequest797.pdf => PullRequest797-pdf.js.pdf} (100%) rename samples/bugs/{PullRequestXrefWhitespaceStart.pdf => PullRequest797-vera.pdf} (100%) diff --git a/samples/bugs/PullRequest797.pdf b/samples/bugs/PullRequest797-pdf.js.pdf similarity index 100% rename from samples/bugs/PullRequest797.pdf rename to samples/bugs/PullRequest797-pdf.js.pdf diff --git a/samples/bugs/PullRequestXrefWhitespaceStart.pdf b/samples/bugs/PullRequest797-vera.pdf similarity index 100% rename from samples/bugs/PullRequestXrefWhitespaceStart.pdf rename to samples/bugs/PullRequest797-vera.pdf diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php index c290fcdc..1b40f8bf 100644 --- a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php +++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php @@ -36,7 +36,6 @@ namespace PHPUnitTests\Integration; use PHPUnitTests\TestCase; -use Smalot\PdfParser\Document; use Smalot\PdfParser\Parser; /** @@ -112,9 +111,9 @@ public function testPDFDocEncodingDecode(): void self::assertStringContainsString($testSubject, $details['Subject']); } - public function testParseFileWhenStartxrefPointsToLeadingWhitespace(): void + public function testParseFileWhenStartxrefPointsToLeadingWhitespaceInVeraPdfFixture(): void { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestXrefWhitespaceStart.pdf'); + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797-vera.pdf'); self::assertCount(1, $document->getPages()); } @@ -124,7 +123,7 @@ public function testParseFileWhenStartxrefPointsToLeadingWhitespace(): void */ public function testParseFileWithCompressedXrefObjectFromPdfJsCorpus(): void { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797.pdf'); + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797-pdf.js.pdf'); self::assertCount(1, $document->getPages()); }