From 7fae0ba50d679b197d80c02b1fe80209fbd547f5 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:12:50 -0300 Subject: [PATCH 1/3] fix: tolerate startxref offset inside xref keyword Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- .../PullRequestStartxrefWhitespaceXrefStream.pdf | Bin 0 -> 9393 bytes src/Smalot/PdfParser/RawData/RawDataParser.php | 8 ++++++-- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf diff --git a/samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf b/samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9d15f2474e107c6029772e2ef2107e7d59458dac GIT binary patch literal 9393 zcma)i2|U!__r9boC9O)KLAETj7;BcsGPdkWLN#UvlUXz~vbL)1icmC`>^qfxjqH*& zAt7Zi`(E^W4{80p{@?#F#{1lR&pqedbI$8N42W2 z=D;`R=wYcS44?ocz5cAM%!Vam==$k8{@iC%JsTK_BU7j#nGFqUBu63@1d?F`pF;hV z{?bF=byE+Z0We67M8x2zI1&*gBD(3r#>j@LY+!XPg}NazB$I4#cq~YEV=ogBSku(x z0uT`dbko0|>a;O{pEw&FmJAY6bEHs71VFtp))q&hl3nys1T09zj!Jb<0E3;JoFvgC zJc%qxBHIF!w23w(kO;+CQx&AGp(X>BmywZxNkT>0ez`#p*QOgAVP6j;8+gNgz#3f~ z1_-ho-B7FF7SiMT=fcg9vw=+(l?j7z7$spdX+4OZgBsQjr{zkCA73C~JqzJ6*_@M%oSoE7G8>`oM{!O|WQi8)z2Zn%Na!^p)@XC}#!YlD_T5SqsJ4n?LhsR*a{J*AQ)lpQe5L)vmk(npCtvfIwfHzdcm->vC|vhM%8V$Kzf7wS-LnX(1t`d z&x}exGm0aQiUr9?KqX)jR0vc8ummrG0{Y2Df0U=Ug3Sg2lK#;jp^y!+A&Lx?!H-t` ztIyCI-R7P@8UjpO77o@2NzwJN5cD55PEWB~ypPLKLupX;q? zvzcgl(?VTLO`JGuu9NB_?}uc)M6+Am$l88i8jlL(VKQRTX4$e=RjYFwLVeqQ4fWe8 zA+Qbt6=|RmA(??r-_>z-taa-=WNbt(oAYfz#6daD2z}dxw#lQDi}H(3laufJN){t# z8KQ;bQ*z!wb};yNL^tX9RdF8JZym?Vu$2wOxQ7Aek}fH6f{(dBs9HDcTo3Cv-^laK z_i-isr|MroroMYiyd7m{ILRf)EE1eUrk%K)ks>>CYJrR95+5RT*J$7PG}Gl=_|r>~ zCByDR^4*ne{%3AJ7qyL0jfG6(Xl8G@LgWOJ<(%Fmhs?`esOpNZPcr)Wt!IC#QT?SY zCr5>f5Tf9kE)Ls5WntZ0vy2S2)Hf$AS9TomvgF)p;)|X^-bW&EQ$In` z@zmh$9uPI5%;3=jkK)e7KENH>@~CW^uBI; z@TwpZ@ofi*pfYTx#CF4(GqZ6eFSk{K!7r2P#mtb!SK1jpw{q`!exV`soi}I*zw*cg*25vHxze?>e^A0Iu>jKOddBC9{(` zOLB%`CT|DBU@L=hHhbUA-Z_boAZFNGZ#_+{D~l!{l-pv@q=)-Hzzkj_$CeVW zHO-kbGDb5B^=}co$U(b#VmIUU-Ha6oxEFIBi09J{#;pCd2>E6Pr6gvFXddcK-Yi}` z>y)MguRCkTO^b_rPkDG=+^)DS6U4%g*y**y9I@-Z*r#pF zYStoqeLRhyh$k@|ziE1(6Q(NB%$CGF=6UxCPcw03vDs*ZiMp$h>FPE;1V{8_^g{?^ z8;TdtTvT3~btvGbNk6wUYuYXQexdw*=b1UmW2cmtAnuIH92Y!K+Ffbz2Uy~Er+uYe9`c1 zu^K_m!CkjSsM7 zP>@4JKRYDf%NJ*BIveGxmcpld{OpI&LeeIWa>D!{@yHb67anP&3SsC+x?y^+5JP-J z8bbn{cLuA2vM_09I#|X<#<$ytHe!7{NAc`M4@*3+23(C&8_2&rAkU^RJiPU_=xZy< z3dbRxeDVa_#G%EVPT`uLSe=c}Z_D&twCH6bRnySCc9FKd7nQBoyqAw1{ZKR=|Lyp- zqu)W_6~4#ciO}%7(`xNsb3QZ+Pp@HKK4;Afd`Er9on3(2@y@l|rc37CwTo`$S2|wzO19|R=2cAuzZY;T zy_$XX&gzZT#03uS{9S>&E_rV8Y~o(?2}48A9+0^mQEkIb;CaC9A8>+OnEQe}D~%kX zK-(qv{&IqXZ$tx)nP#BSlrNVbSCF0m*|A_$=kkG~&?4d$rSX+ldi|l3x%GseFQqSx zK?mTh3v9$9sh-##`yS2rnXbE)DtSYAEtE>CLW z-gA>ilSfnwA;`1BJz}((4`1U}s7rod6&D<*CFb7uyLBw)T)2NBr0-Fm#2(FH@v+@J z`>qa8j)E7DPL~~i!J@@tw?l646d~Fy8s=cpALWscz#&p^>2F^ZkUMtvXtBx?!b?|6 zyVi|Qwo>u(-F&leE+TwznQV!7F@bwCc9x7)SLw!Cif8Hs3Fv{tk}ZoN>Y7%=bqrsmrIuc!73nG4DD zdR7WnJJ#q`QU)j=JwKM|JLnh+e%5=Wy;BFTuNbMWrK8<_pQ?6AI7HV;*EUEk=%HYg z{*y#Z;?6|(2V-F=21=pZ69p3qr;19N9iLqJ;H;qaFoATc>;YeGQr(i85L8Nla%>iC z9+MWD_Pnd-^T21NE(3}+Wra)}EF3;eF&Ki*`pjGzR%*D{FxeDk`KidRD`s|E$F{Ls z0(asniYp@SJoo*0kaSMbYQJgwY3fz)i`p-PD7xtn&q%DsJu(*;Q#MrN&)&r`2aFMDy9}z%_HY3GoZmoU4v%^MVl%GjwIm_nmp3Y=ex7=`-K) zMlxB+y8nZU$w9w~>X7Q>!rH>X!r^(BdC$Y0?A>>F_(%HNAD)yJgs&!W5kh67@R#v8 zQrJ1p=Xg!A_H*5?&l(HTpA%eN24<|jv@g^zs5mY;&N;c4@n5fcs$cYaFv7`9D5K{c zwj+MB@ab{)Hw$k@F3pd5OK-8yO>=plKC%{5lWChZ?0Md0@}Rwjy=TW-eg^)%`UQu+ z-1*A+?)kT}>V(cWO%tTCvLYzq^t=y-=ioWykfu*x5Z-Z zQGVF5u=`=(!tKJ}JXU?2N#mw@(-tEz5v`FYBQv6SqI{!PqH)okPc)z8#t6lP#%_ygGdG=*{rBUVUD&4BuYGn1x z8bnQ5tx|1qokCqfygG@v2E9qA`jna)duVr56Hpw>SH_JB{wkWogwyLz2x2d;P zw`;dIywQKt(s8<@^R4CE{!UEiNEg0qy8BA^*LOF1w)ObD-~B$cm%BHnPqZ(yU#7q0 zgZhV8111CSKB7O4f1-T)Hh6o8eJFJJ=y37~Y@~P;IodpCF*Z2vF#h%P-7kB-&?ZDC zvL{bYzM48aH8@S4{yyV3%Qc%g2cN5$H=OTXAS`@c^jYFsN?Mj%u30f#8UE_@jrrT7 zRngVLHQlwI?}YE)8Fw-)-k$yXA2(h)dfmKzs*yCfXgd@c1?VOJW11ludj$mD2<9&b z*Mqrf+7E01mjT8%ulJYPRZZEY0T=+hO)>1?jToBRu#0G6@pux*j6}v`L_wyP*fmVq zjsD~K*CYOO2Y}5{155-#p>Q^^DvE+#KOC^Bo{p)usu+@t#p;oWBvld~vw6Vu32a~u zA^=QqL|YKp3`ax|DY#9+Z-j>_*%3=m3N~O4hH3{8Co({W{qx24j|2qy|CaqPjlYfm zqmw=cy#J7I&fk;+pFrl*ecmJoNIal}&V29=d%10yzuZM7urZ;~}K%rOm``$i$?^$mF7-x8KnHCe!{s z2fc5_u!tj1ZZkdUk_OV88!624NgehQsOx{m=wkrgR&2Ym};O z_}Nz%{7rmqgWoyLxOw}99Ln?vfjIH$f{m4t_v%Vh@ak9hhj%CY@m{2K^5@=GdU+mo zq)R3@;)~7XQ;x31&&N(Qo|uJ4)_G1_EZd!bB|Mv49Hdfza7x1+dOcL7$!wa({iw9)}!lH2L%WB4c;IdE*tTrPMlUTR5>&L=6P-&!Hv+@b#3Bo zRH68MLHqN=9oLlb$l|4dM78J8R@Few0HuaW<^j2Yfr;;AQK< zr7LU7eNAPCETM;WWY3=ClZe=%dr&krWVO}U_+ZM{_IvHCmpllugd#%O7fj7+GLx>^ z7|*r?%9FKO%2 zq`kA1meX0zT8bhQ2N%7=tnVi6ck(__m2T~I=otBqWKfmHfGT3>{Hzrvv7oW%5)s-5 z>x0XNy%}<+WXrd5qjt%z>WPI%l3Y`8ACmPn&nrJ$s7>jw=tq~Q^lvTiD*x)SxJ~Sq z6Z0fnz|LK}IJZlAaFz2S%-(xCPQE2%zB`i_KT%isG)JXl&G0MXOJMiQoSWST?ajE? zMvq80ACfy_$3kPJ?P_P%X9{56wJmphE)!?vqJNVQAuH@j(puS4mrL4d=q0R_059?K z%Et>MdUs5peQ2*?NbEKe&tVR(Y`L@FfJgMEWQ=~W)?q;-nANF=&GGGAc7dwN!FQjd z?|abNLo7cf5UE^N`n+TMqwOBBB;olOv0B}*V2u*d$18>x#xz%P41>)fpJH!P4_N6H z1Z%!+&Y|*H8F+|95u+K8$MF{iZg1NSoNQp!-4=(!FyfOvR5AZJBX~G5777m~CR&fE z+)J_BqNJ}oV(xnLOxy8uYWAZ|bFdQCaU2f->P8)1{bgE(Ts?YYWN|wPvFX_!xo!I0 z@fop2Nx{`^#~imdMn6oV38i|D@D{2afJcR<>QVFjyX5rZ+}=dBOF7sJo{3OhoQ0aI zO*X$YRc&jh;6*^ikqNE%`O}9r?={EH?bOADuf!A@Tcwt!Wf+^ay~}-{{dnFiFZn_W zDcJ&VOYk3a4NuN6vTuKDCpmFPevU~AY0zrz>ciP$oKk(xplvq$R%C3H^In7a6U*@@ z`z7H!L}(Qcrh5w2$X-k2EmRi&tW-yD+Hk{Tp+oMs{r052$Q*o&mep=L{KEA_&MA_$)6hsn zh|&$gd@PnV`&IAj`^~9>)yHZPcthg1;pu_`DGE=aQp1ybO(9oP1F@d@bKb%_C8CGj znmYtE#H_Me2qccj%MPN@%ZJAUUKZjEE5eN~=i2HIzMWT?uRAqy%C-^1;)moqAzeu@ep>2YAGYg+ipH z>ltO)?JiZti$~{sGvlc(;ny-=6+B5YD63#(OQ!@Cb@9?-B-Jx2cbMDFoFW;sNd>`5 z&NN^#5YLDrv1+GuXV=m889|${kAc1SDh01Vg{7Q4`P%LdjXtbVhCPY9v{p$J@C#;t zc;Q3N0`oExe+>6|Wc_9V=sRcQJ_^2K-FDS5KFNTg+D^BR%B)5)A^Z8Ak%-cD_ z=}XyVAdK_Al59!t{3I?r?o!h_22M#$w32ddFE)zpj9jkzn37T*@=8qN^V__nq++dV zZ5g2>FRnDm1wVg*%$Gp=wleE$u;7%duejcg7ZQpSwsijzT{AoL>iWr|wbwm8DYWs< z32`;uFWpyP?A2}97jL%M{PJuSOo~}ji+az$DS7xyNNnhe+l0Q#!Q{ug!mbY_w#y%K zL%^#=3rBPXmc0_xtQl3_uz^ok7Vud?`77rRye{F$mts0grCFT}v2(it=2h?u)#!SW z&Ny`}_*&ss@^~^ciy89Hz@e!&hMGDB^8ig<+HUrft#WnBegOw+b`L%s88#6>HVTz3 zam6!#d^VkMvx{9mcW;gX?S*BB)K-Jp!OtBATiKbrrEgs|c2hxqQtM{HM1o&4Htf_r zk!@q~eCz2m{pQ@w%aCRViN;&R3m&_ySJ6h);x3>3toPUWacZ}92L1Q&vpXSabE=Oj z4Nzj@Gk5CHUAr~v8pZ57UQI;^SG2yi3wZDqbGziL1yQ@BxXze#L6=jd|8B!wgY?B` zam1FMEdqx{isM;`hYpw|cHEu-va(u#J{!1C{jjgBlY?)4Jf77~UnlC(VSH~r{m*a6B&z>$e z>=Cr=9fCSH&EyOok$@K-IMP@|=5{+geT91fIuJtTdV=h<4-EWL@da)Cy;*ti&LZDH zg9qv;vcqSosr!Uo&o^T5_m^pPEYR>KAHG;+l{9Z@k3ny3c;oR8qQ0SOGP@_Sp2=FF z+oIiu7CAu91~E8_6so5 zwt|yK?jI~VR>BZ`?oRLVS=hzuy(_|poCP{sP*8L6z3-~5KlO|!pN;Vfz`vUD$f>#J z{OwWYZedrx?`N+W-V_iH3|z*>oka_mKj7nVZaylp$8Imb)q~K}s^NSAtr1C?FN^%+ zd?p>-_J5SH|9*mRxbXZp{#9Pj69~5E`sv=5kviB8b5r79YSqgU1bsy$V5^ zdAZU~=+VkgjJ~K#n^L}~HaDn?Pp&l6=Q(D6C>@b0=l7YRy$y=gR1QOatTal)R6XJe zOXd*}p!p?Uqy=Z&6i;8oST6eKp=XjpGc~4Jqp}_#< z+@tIMm+URAYm0&gEeCe`CSs#)jjnU@cNX|nJ@*aNy^i!(L|VHjxyn)oI3S_4DxJTbgLto7X`dbF1j>Q!_@@#V1^!h#7?S< ziorb9Kc=`mmOs{gWXOnC2bD7)aeL|kAtK{vLWUYnWSn)F9XZWTYxU`hBq&|b_xqHDkGc9fV(jI}*>bIY* z-E$<7w3TVXk$WW5d&P?i{X0dxu&Id*zKY~Vt^UBN(T7{f(=TKnxtOb1!VXO5rbcYoPPw?wt69#L{KB>TW+3R?lF z^c&{5=bW}^Irmi{#R6IGS-G2z|BxO;OEa5NJd&(N*Y8T`SGFyW%>(5{3(M%0kHeG zOdg<+{*p<_15ouZ85{tmf61V52*4oyIS(og1vsuhWpEe-0C#`Mq~!lu3x~`6E0duE z*uU1w{_C4GMCKpgq@nPCY>|e+|M6EE4*w@s($cWs_GM?Ik^zv9B?JF2qR0*aZJaB1 z{Q$tmB!G2VC*c6GwmQf{QC&t2DG!rXS3}Cd;BYuZMovRjMjjz84}~M;AhHl;kRk$x zkd>B_N6M!xgp4X2DhHF2mY0F4D_gMx(3&0Kod7m#o{ua5 literal 0 HcmV?d00001 diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 411b1951c..bebe23f6e 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -947,8 +947,12 @@ protected function getXrefData(string $pdfData, int $offset = 0, array $xref = [ throw new \Exception('Unable to find xref (PDF corrupted?)'); } - // Some files point startxref to the whitespace right before the xref keyword. + // Some files point startxref to the whitespace right before the xref keyword or stream object. $startxrefOffset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref); + // Be tolerant if startxref points one byte into the xref keyword ("ref"). + if ($startxrefOffset > 0 && strpos($pdfData, 'xref', $startxrefOffset - 1) == $startxrefOffset - 1) { + --$startxrefOffset; + } // check xref position if (strpos($pdfData, 'xref', $startxrefOffset) == $startxrefOffset) { @@ -963,7 +967,7 @@ protected function getXrefData(string $pdfData, int $offset = 0, array $xref = [ $xref = ['Unix' => true]; } else { // Cross-Reference Stream - $xref = $this->decodeXrefStream($pdfData, $startxref, $xref, $visitedOffsets); + $xref = $this->decodeXrefStream($pdfData, $startxrefOffset, $xref, $visitedOffsets); } } if (empty($xref)) { From 1288da763b63974f0eb993b496ef3cc6b537d991 Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:15:09 -0300 Subject: [PATCH 2/3] test: add second startxref regression sample Signed-off-by: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> --- samples/bugs/PullRequest794.pdf | Bin 0 -> 6499 bytes .../Integration/RawData/RawDataParserTest.php | 20 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 samples/bugs/PullRequest794.pdf diff --git a/samples/bugs/PullRequest794.pdf b/samples/bugs/PullRequest794.pdf new file mode 100644 index 0000000000000000000000000000000000000000..718557609b254ab33923a92168582a720f6a4cf2 GIT binary patch literal 6499 zcmeHLU5p!772YI($|!Ebk4g|A-EK)YA+hiL*j|t8b-T9Xu<7n*<8?~VX3Ngb-1Sa6 zo|(*yv)-TxQ2P)B50MILB^sfY3Y8Z`rM^@}>LPyBR@oF);-TV!JRlw*5=Q5-wnkgzIc5W72CuYZ6 zN1{+Dg1GBBMCoR@$|HSSQJ*9pS|uIboSOLi>u*dTU7MQtT49c#v+HDC|LP@&EMA&# ztCyZsWo_b-YNpaJ_q%o%;)42J!*t92sc{8DLdx(=oAG!xgHXlM*2=9$GZ+MKQ{(HN zXO}VF*x1NzNIA>tU_q8;%!^nQvoIp-ZknFb&zkOoOh=#vmQZzco341EO;aOfcF!@G zDow@2AYGbbSjY)@9MKslt5(mO8V}TgLQO5J;J#-$be8^y&2^Dn7_aL{GPAL!#DoJ+ZEa0C-iM9jg7=ZrbOjY% z?=r{f5Nx`bgu}pfKQgpuQC%lpWp_!Jjd?IG2FYp@$J^Xp&<;i=L}@YDU5sXCLZqn^ zL)%>jcA23;T5w^|AVi79Df?cy@ex57Yf2~C8ikiFxve-Z@n_v4uGMo?lCTpb$zU}D zQ`LIU^{g)P4Oj5+fY@Y*m6%GGxUd1qnbu4-wl!3yHK$r#8r80%uG3nIMVzpZklxjf z?T|ISUsc_Ne9(FuNk0kFi${Hl_RNrmsMpZ{i{*{2PndgU>SMJ|U;U{kJ4PA~vLYIJ zQ89Wfw$+B>k(8Q2=N;)%K|{xKHmh`}Ii_Ny)J6Swq)i)CwW@1%35qqO6vui-g#vsI zOVS9-ejhDPL!G`Eze1GlU@xbKjn?hArZ$$ZFDh5VJJOeml|0|rP$3KsmaZMu1k=s2 zLxydOzG6d(=Tjvfik@Ar`?bAoS;o|OSi9SB2BTU}H#FkVA{PuJG$p3SMP4Xoc`?fi zZ9ysva#<9P@#9c4jmf0Ho`VYm7yrYxhak#TGilPwE)yOUBy&k$!fl3Kb&yL11i7~EP$5? zSO8d?n%JPQ0iJx^5dt_0&m#1qdjO0Mhw<`gyxgAdkuoY~i&-%%!N11~SzB@4ta^kM z3l94@Pv9vq3gJ>A0D1z&r_j2fH~;M_rCP+u~$x?Jbyvlx9=x=u3l?B z(Hi?~=lQ+b_r6S?dhh6?w_d!8I_R7CUEO~6UggJoUU>%HdKg{&)bBs@=m%f8`Y(_F z(RcS4_dcE7yZY=)kAM3Xv2Wm)kG(zn&f(L%$ny*K>|3W!p1J4e$8s~<*~kBJ{jnSP zD{tbH$7bgC$lR^!mDl%u@$&mmEnoZ6<(c>S2gbN>-SamWmCk+sw>Qvl4xraweR==e znkGJU{ls&p-`(C?S-!Eg^~-$^+&{6s^@~3|{hjH*K2)3i_`&-=d}06orN4}gU3+i7 znW3>wMwx=vaGmQ=VQN+%Ezy7#c=k_efMcnP0792A#aXxw)s>M07Po*M0Q&ODgAh;( zdR!yMAUGTz+AgY8@Yz$V=ZWgEM;-}`YgFMwsf4C0BkYHu83}1*E(enrgnThfTb6C1 zM{PfERYU2yWRR)=`-8L#ILJKsgaqH0K@PGMO{pS%8)i>51lYg@N>oPeb%FwB1&sk4 z(%?v^1k1+?D6HKNv(Yxl4Td_zHUMO}$OA=8j&T=ctwD?KMzTYbUU0}oB&2y43qUyO zfLBO-YmEJ$)$%gD0-U zj3R@R!xlP%jpFDxh_Jr!7X%F|RHVP~!rtMF_18u(M}iOf3FcY0t|DI8Ax1#~^s~%Q zxmcDZXriw#$})$zJ;N&_T37w5iqg63r$Hi-{n8pUF`}01rt8te0;xMpI@sC%q*Gaz z;n-eGCPlbUV_6X=w?a`KklX)_+j4ipZ2?8GQ~v+qws*PTfS}&xes{Sa!*h(z0~lcU z4j(P|qgRg|;-qNRUHV6{{cY&q4&3si;(rK5taOMw#qaNe8!gpM(3u^tO2Vrwb-*C$ zomp}`FdSVqh}r9|5(mSLVVwF_RJY8o9;&4Zj-CgTWsEz;WCwhRi}Rt_VHEM9A*7t) zGF%^SYBC()i4uHAA5<(9qynNn!H0q}`ax#YCgcGar`kk{tWs@K9?soVTb_plJ=rD} zB>=yvw#njE7GxpcEb)_7RFP{nxmXj61-TB(T5n2?Qk|C@lh7^6b+J}m&HzNsz#TkfpC9jn OTqgetPages()); } + + /** + * @see https://github.com/smalot/pdfparser/pull/798 + */ + public function testParseFileWhenStartxrefPointsToLeadingWhitespaceInXrefStream(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf'); + + self::assertCount(1, $document->getPages()); + } + + /** + * @see https://github.com/smalot/pdfparser/pull/798 + */ + public function testParseFileWhenStartxrefPointsNearXrefKeyword(): void + { + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest794.pdf'); + + self::assertCount(1, $document->getPages()); + } } From 522429505a02d6c21140be320624530c0b31ba2a Mon Sep 17 00:00:00 2001 From: Vitor Mattos <1079143+vitormattos@users.noreply.github.com> Date: Sat, 25 Apr 2026 18:45:33 -0300 Subject: [PATCH 3/3] test(rawdata): deduplicate fixtures and use data provider --- samples/bugs/PullRequest794.pdf | Bin 6499 -> 0 bytes ...llRequestStartxrefWhitespaceXrefStream.pdf | Bin 9393 -> 0 bytes .../{ => rawdata}/PullRequest797-pdf.js.pdf | Bin .../{ => rawdata}/PullRequest797-vera.pdf | Bin .../PullRequestInvalidObjectReference.pdf | Bin .../Integration/RawData/RawDataParserTest.php | 54 +++++------------- 6 files changed, 14 insertions(+), 40 deletions(-) delete mode 100644 samples/bugs/PullRequest794.pdf delete mode 100644 samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf rename samples/bugs/{ => rawdata}/PullRequest797-pdf.js.pdf (100%) rename samples/bugs/{ => rawdata}/PullRequest797-vera.pdf (100%) rename samples/bugs/{ => rawdata}/PullRequestInvalidObjectReference.pdf (100%) diff --git a/samples/bugs/PullRequest794.pdf b/samples/bugs/PullRequest794.pdf deleted file mode 100644 index 718557609b254ab33923a92168582a720f6a4cf2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6499 zcmeHLU5p!772YI($|!Ebk4g|A-EK)YA+hiL*j|t8b-T9Xu<7n*<8?~VX3Ngb-1Sa6 zo|(*yv)-TxQ2P)B50MILB^sfY3Y8Z`rM^@}>LPyBR@oF);-TV!JRlw*5=Q5-wnkgzIc5W72CuYZ6 zN1{+Dg1GBBMCoR@$|HSSQJ*9pS|uIboSOLi>u*dTU7MQtT49c#v+HDC|LP@&EMA&# ztCyZsWo_b-YNpaJ_q%o%;)42J!*t92sc{8DLdx(=oAG!xgHXlM*2=9$GZ+MKQ{(HN zXO}VF*x1NzNIA>tU_q8;%!^nQvoIp-ZknFb&zkOoOh=#vmQZzco341EO;aOfcF!@G zDow@2AYGbbSjY)@9MKslt5(mO8V}TgLQO5J;J#-$be8^y&2^Dn7_aL{GPAL!#DoJ+ZEa0C-iM9jg7=ZrbOjY% z?=r{f5Nx`bgu}pfKQgpuQC%lpWp_!Jjd?IG2FYp@$J^Xp&<;i=L}@YDU5sXCLZqn^ zL)%>jcA23;T5w^|AVi79Df?cy@ex57Yf2~C8ikiFxve-Z@n_v4uGMo?lCTpb$zU}D zQ`LIU^{g)P4Oj5+fY@Y*m6%GGxUd1qnbu4-wl!3yHK$r#8r80%uG3nIMVzpZklxjf z?T|ISUsc_Ne9(FuNk0kFi${Hl_RNrmsMpZ{i{*{2PndgU>SMJ|U;U{kJ4PA~vLYIJ zQ89Wfw$+B>k(8Q2=N;)%K|{xKHmh`}Ii_Ny)J6Swq)i)CwW@1%35qqO6vui-g#vsI zOVS9-ejhDPL!G`Eze1GlU@xbKjn?hArZ$$ZFDh5VJJOeml|0|rP$3KsmaZMu1k=s2 zLxydOzG6d(=Tjvfik@Ar`?bAoS;o|OSi9SB2BTU}H#FkVA{PuJG$p3SMP4Xoc`?fi zZ9ysva#<9P@#9c4jmf0Ho`VYm7yrYxhak#TGilPwE)yOUBy&k$!fl3Kb&yL11i7~EP$5? zSO8d?n%JPQ0iJx^5dt_0&m#1qdjO0Mhw<`gyxgAdkuoY~i&-%%!N11~SzB@4ta^kM z3l94@Pv9vq3gJ>A0D1z&r_j2fH~;M_rCP+u~$x?Jbyvlx9=x=u3l?B z(Hi?~=lQ+b_r6S?dhh6?w_d!8I_R7CUEO~6UggJoUU>%HdKg{&)bBs@=m%f8`Y(_F z(RcS4_dcE7yZY=)kAM3Xv2Wm)kG(zn&f(L%$ny*K>|3W!p1J4e$8s~<*~kBJ{jnSP zD{tbH$7bgC$lR^!mDl%u@$&mmEnoZ6<(c>S2gbN>-SamWmCk+sw>Qvl4xraweR==e znkGJU{ls&p-`(C?S-!Eg^~-$^+&{6s^@~3|{hjH*K2)3i_`&-=d}06orN4}gU3+i7 znW3>wMwx=vaGmQ=VQN+%Ezy7#c=k_efMcnP0792A#aXxw)s>M07Po*M0Q&ODgAh;( zdR!yMAUGTz+AgY8@Yz$V=ZWgEM;-}`YgFMwsf4C0BkYHu83}1*E(enrgnThfTb6C1 zM{PfERYU2yWRR)=`-8L#ILJKsgaqH0K@PGMO{pS%8)i>51lYg@N>oPeb%FwB1&sk4 z(%?v^1k1+?D6HKNv(Yxl4Td_zHUMO}$OA=8j&T=ctwD?KMzTYbUU0}oB&2y43qUyO zfLBO-YmEJ$)$%gD0-U zj3R@R!xlP%jpFDxh_Jr!7X%F|RHVP~!rtMF_18u(M}iOf3FcY0t|DI8Ax1#~^s~%Q zxmcDZXriw#$})$zJ;N&_T37w5iqg63r$Hi-{n8pUF`}01rt8te0;xMpI@sC%q*Gaz z;n-eGCPlbUV_6X=w?a`KklX)_+j4ipZ2?8GQ~v+qws*PTfS}&xes{Sa!*h(z0~lcU z4j(P|qgRg|;-qNRUHV6{{cY&q4&3si;(rK5taOMw#qaNe8!gpM(3u^tO2Vrwb-*C$ zomp}`FdSVqh}r9|5(mSLVVwF_RJY8o9;&4Zj-CgTWsEz;WCwhRi}Rt_VHEM9A*7t) zGF%^SYBC()i4uHAA5<(9qynNn!H0q}`ax#YCgcGar`kk{tWs@K9?soVTb_plJ=rD} zB>=yvw#njE7GxpcEb)_7RFP{nxmXj61-TB(T5n2?Qk|C@lh7^6b+J}m&HzNsz#TkfpC9jn OTq^qfxjqH*& zAt7Zi`(E^W4{80p{@?#F#{1lR&pqedbI$8N42W2 z=D;`R=wYcS44?ocz5cAM%!Vam==$k8{@iC%JsTK_BU7j#nGFqUBu63@1d?F`pF;hV z{?bF=byE+Z0We67M8x2zI1&*gBD(3r#>j@LY+!XPg}NazB$I4#cq~YEV=ogBSku(x z0uT`dbko0|>a;O{pEw&FmJAY6bEHs71VFtp))q&hl3nys1T09zj!Jb<0E3;JoFvgC zJc%qxBHIF!w23w(kO;+CQx&AGp(X>BmywZxNkT>0ez`#p*QOgAVP6j;8+gNgz#3f~ z1_-ho-B7FF7SiMT=fcg9vw=+(l?j7z7$spdX+4OZgBsQjr{zkCA73C~JqzJ6*_@M%oSoE7G8>`oM{!O|WQi8)z2Zn%Na!^p)@XC}#!YlD_T5SqsJ4n?LhsR*a{J*AQ)lpQe5L)vmk(npCtvfIwfHzdcm->vC|vhM%8V$Kzf7wS-LnX(1t`d z&x}exGm0aQiUr9?KqX)jR0vc8ummrG0{Y2Df0U=Ug3Sg2lK#;jp^y!+A&Lx?!H-t` ztIyCI-R7P@8UjpO77o@2NzwJN5cD55PEWB~ypPLKLupX;q? zvzcgl(?VTLO`JGuu9NB_?}uc)M6+Am$l88i8jlL(VKQRTX4$e=RjYFwLVeqQ4fWe8 zA+Qbt6=|RmA(??r-_>z-taa-=WNbt(oAYfz#6daD2z}dxw#lQDi}H(3laufJN){t# z8KQ;bQ*z!wb};yNL^tX9RdF8JZym?Vu$2wOxQ7Aek}fH6f{(dBs9HDcTo3Cv-^laK z_i-isr|MroroMYiyd7m{ILRf)EE1eUrk%K)ks>>CYJrR95+5RT*J$7PG}Gl=_|r>~ zCByDR^4*ne{%3AJ7qyL0jfG6(Xl8G@LgWOJ<(%Fmhs?`esOpNZPcr)Wt!IC#QT?SY zCr5>f5Tf9kE)Ls5WntZ0vy2S2)Hf$AS9TomvgF)p;)|X^-bW&EQ$In` z@zmh$9uPI5%;3=jkK)e7KENH>@~CW^uBI; z@TwpZ@ofi*pfYTx#CF4(GqZ6eFSk{K!7r2P#mtb!SK1jpw{q`!exV`soi}I*zw*cg*25vHxze?>e^A0Iu>jKOddBC9{(` zOLB%`CT|DBU@L=hHhbUA-Z_boAZFNGZ#_+{D~l!{l-pv@q=)-Hzzkj_$CeVW zHO-kbGDb5B^=}co$U(b#VmIUU-Ha6oxEFIBi09J{#;pCd2>E6Pr6gvFXddcK-Yi}` z>y)MguRCkTO^b_rPkDG=+^)DS6U4%g*y**y9I@-Z*r#pF zYStoqeLRhyh$k@|ziE1(6Q(NB%$CGF=6UxCPcw03vDs*ZiMp$h>FPE;1V{8_^g{?^ z8;TdtTvT3~btvGbNk6wUYuYXQexdw*=b1UmW2cmtAnuIH92Y!K+Ffbz2Uy~Er+uYe9`c1 zu^K_m!CkjSsM7 zP>@4JKRYDf%NJ*BIveGxmcpld{OpI&LeeIWa>D!{@yHb67anP&3SsC+x?y^+5JP-J z8bbn{cLuA2vM_09I#|X<#<$ytHe!7{NAc`M4@*3+23(C&8_2&rAkU^RJiPU_=xZy< z3dbRxeDVa_#G%EVPT`uLSe=c}Z_D&twCH6bRnySCc9FKd7nQBoyqAw1{ZKR=|Lyp- zqu)W_6~4#ciO}%7(`xNsb3QZ+Pp@HKK4;Afd`Er9on3(2@y@l|rc37CwTo`$S2|wzO19|R=2cAuzZY;T zy_$XX&gzZT#03uS{9S>&E_rV8Y~o(?2}48A9+0^mQEkIb;CaC9A8>+OnEQe}D~%kX zK-(qv{&IqXZ$tx)nP#BSlrNVbSCF0m*|A_$=kkG~&?4d$rSX+ldi|l3x%GseFQqSx zK?mTh3v9$9sh-##`yS2rnXbE)DtSYAEtE>CLW z-gA>ilSfnwA;`1BJz}((4`1U}s7rod6&D<*CFb7uyLBw)T)2NBr0-Fm#2(FH@v+@J z`>qa8j)E7DPL~~i!J@@tw?l646d~Fy8s=cpALWscz#&p^>2F^ZkUMtvXtBx?!b?|6 zyVi|Qwo>u(-F&leE+TwznQV!7F@bwCc9x7)SLw!Cif8Hs3Fv{tk}ZoN>Y7%=bqrsmrIuc!73nG4DD zdR7WnJJ#q`QU)j=JwKM|JLnh+e%5=Wy;BFTuNbMWrK8<_pQ?6AI7HV;*EUEk=%HYg z{*y#Z;?6|(2V-F=21=pZ69p3qr;19N9iLqJ;H;qaFoATc>;YeGQr(i85L8Nla%>iC z9+MWD_Pnd-^T21NE(3}+Wra)}EF3;eF&Ki*`pjGzR%*D{FxeDk`KidRD`s|E$F{Ls z0(asniYp@SJoo*0kaSMbYQJgwY3fz)i`p-PD7xtn&q%DsJu(*;Q#MrN&)&r`2aFMDy9}z%_HY3GoZmoU4v%^MVl%GjwIm_nmp3Y=ex7=`-K) zMlxB+y8nZU$w9w~>X7Q>!rH>X!r^(BdC$Y0?A>>F_(%HNAD)yJgs&!W5kh67@R#v8 zQrJ1p=Xg!A_H*5?&l(HTpA%eN24<|jv@g^zs5mY;&N;c4@n5fcs$cYaFv7`9D5K{c zwj+MB@ab{)Hw$k@F3pd5OK-8yO>=plKC%{5lWChZ?0Md0@}Rwjy=TW-eg^)%`UQu+ z-1*A+?)kT}>V(cWO%tTCvLYzq^t=y-=ioWykfu*x5Z-Z zQGVF5u=`=(!tKJ}JXU?2N#mw@(-tEz5v`FYBQv6SqI{!PqH)okPc)z8#t6lP#%_ygGdG=*{rBUVUD&4BuYGn1x z8bnQ5tx|1qokCqfygG@v2E9qA`jna)duVr56Hpw>SH_JB{wkWogwyLz2x2d;P zw`;dIywQKt(s8<@^R4CE{!UEiNEg0qy8BA^*LOF1w)ObD-~B$cm%BHnPqZ(yU#7q0 zgZhV8111CSKB7O4f1-T)Hh6o8eJFJJ=y37~Y@~P;IodpCF*Z2vF#h%P-7kB-&?ZDC zvL{bYzM48aH8@S4{yyV3%Qc%g2cN5$H=OTXAS`@c^jYFsN?Mj%u30f#8UE_@jrrT7 zRngVLHQlwI?}YE)8Fw-)-k$yXA2(h)dfmKzs*yCfXgd@c1?VOJW11ludj$mD2<9&b z*Mqrf+7E01mjT8%ulJYPRZZEY0T=+hO)>1?jToBRu#0G6@pux*j6}v`L_wyP*fmVq zjsD~K*CYOO2Y}5{155-#p>Q^^DvE+#KOC^Bo{p)usu+@t#p;oWBvld~vw6Vu32a~u zA^=QqL|YKp3`ax|DY#9+Z-j>_*%3=m3N~O4hH3{8Co({W{qx24j|2qy|CaqPjlYfm zqmw=cy#J7I&fk;+pFrl*ecmJoNIal}&V29=d%10yzuZM7urZ;~}K%rOm``$i$?^$mF7-x8KnHCe!{s z2fc5_u!tj1ZZkdUk_OV88!624NgehQsOx{m=wkrgR&2Ym};O z_}Nz%{7rmqgWoyLxOw}99Ln?vfjIH$f{m4t_v%Vh@ak9hhj%CY@m{2K^5@=GdU+mo zq)R3@;)~7XQ;x31&&N(Qo|uJ4)_G1_EZd!bB|Mv49Hdfza7x1+dOcL7$!wa({iw9)}!lH2L%WB4c;IdE*tTrPMlUTR5>&L=6P-&!Hv+@b#3Bo zRH68MLHqN=9oLlb$l|4dM78J8R@Few0HuaW<^j2Yfr;;AQK< zr7LU7eNAPCETM;WWY3=ClZe=%dr&krWVO}U_+ZM{_IvHCmpllugd#%O7fj7+GLx>^ z7|*r?%9FKO%2 zq`kA1meX0zT8bhQ2N%7=tnVi6ck(__m2T~I=otBqWKfmHfGT3>{Hzrvv7oW%5)s-5 z>x0XNy%}<+WXrd5qjt%z>WPI%l3Y`8ACmPn&nrJ$s7>jw=tq~Q^lvTiD*x)SxJ~Sq z6Z0fnz|LK}IJZlAaFz2S%-(xCPQE2%zB`i_KT%isG)JXl&G0MXOJMiQoSWST?ajE? zMvq80ACfy_$3kPJ?P_P%X9{56wJmphE)!?vqJNVQAuH@j(puS4mrL4d=q0R_059?K z%Et>MdUs5peQ2*?NbEKe&tVR(Y`L@FfJgMEWQ=~W)?q;-nANF=&GGGAc7dwN!FQjd z?|abNLo7cf5UE^N`n+TMqwOBBB;olOv0B}*V2u*d$18>x#xz%P41>)fpJH!P4_N6H z1Z%!+&Y|*H8F+|95u+K8$MF{iZg1NSoNQp!-4=(!FyfOvR5AZJBX~G5777m~CR&fE z+)J_BqNJ}oV(xnLOxy8uYWAZ|bFdQCaU2f->P8)1{bgE(Ts?YYWN|wPvFX_!xo!I0 z@fop2Nx{`^#~imdMn6oV38i|D@D{2afJcR<>QVFjyX5rZ+}=dBOF7sJo{3OhoQ0aI zO*X$YRc&jh;6*^ikqNE%`O}9r?={EH?bOADuf!A@Tcwt!Wf+^ay~}-{{dnFiFZn_W zDcJ&VOYk3a4NuN6vTuKDCpmFPevU~AY0zrz>ciP$oKk(xplvq$R%C3H^In7a6U*@@ z`z7H!L}(Qcrh5w2$X-k2EmRi&tW-yD+Hk{Tp+oMs{r052$Q*o&mep=L{KEA_&MA_$)6hsn zh|&$gd@PnV`&IAj`^~9>)yHZPcthg1;pu_`DGE=aQp1ybO(9oP1F@d@bKb%_C8CGj znmYtE#H_Me2qccj%MPN@%ZJAUUKZjEE5eN~=i2HIzMWT?uRAqy%C-^1;)moqAzeu@ep>2YAGYg+ipH z>ltO)?JiZti$~{sGvlc(;ny-=6+B5YD63#(OQ!@Cb@9?-B-Jx2cbMDFoFW;sNd>`5 z&NN^#5YLDrv1+GuXV=m889|${kAc1SDh01Vg{7Q4`P%LdjXtbVhCPY9v{p$J@C#;t zc;Q3N0`oExe+>6|Wc_9V=sRcQJ_^2K-FDS5KFNTg+D^BR%B)5)A^Z8Ak%-cD_ z=}XyVAdK_Al59!t{3I?r?o!h_22M#$w32ddFE)zpj9jkzn37T*@=8qN^V__nq++dV zZ5g2>FRnDm1wVg*%$Gp=wleE$u;7%duejcg7ZQpSwsijzT{AoL>iWr|wbwm8DYWs< z32`;uFWpyP?A2}97jL%M{PJuSOo~}ji+az$DS7xyNNnhe+l0Q#!Q{ug!mbY_w#y%K zL%^#=3rBPXmc0_xtQl3_uz^ok7Vud?`77rRye{F$mts0grCFT}v2(it=2h?u)#!SW z&Ny`}_*&ss@^~^ciy89Hz@e!&hMGDB^8ig<+HUrft#WnBegOw+b`L%s88#6>HVTz3 zam6!#d^VkMvx{9mcW;gX?S*BB)K-Jp!OtBATiKbrrEgs|c2hxqQtM{HM1o&4Htf_r zk!@q~eCz2m{pQ@w%aCRViN;&R3m&_ySJ6h);x3>3toPUWacZ}92L1Q&vpXSabE=Oj z4Nzj@Gk5CHUAr~v8pZ57UQI;^SG2yi3wZDqbGziL1yQ@BxXze#L6=jd|8B!wgY?B` zam1FMEdqx{isM;`hYpw|cHEu-va(u#J{!1C{jjgBlY?)4Jf77~UnlC(VSH~r{m*a6B&z>$e z>=Cr=9fCSH&EyOok$@K-IMP@|=5{+geT91fIuJtTdV=h<4-EWL@da)Cy;*ti&LZDH zg9qv;vcqSosr!Uo&o^T5_m^pPEYR>KAHG;+l{9Z@k3ny3c;oR8qQ0SOGP@_Sp2=FF z+oIiu7CAu91~E8_6so5 zwt|yK?jI~VR>BZ`?oRLVS=hzuy(_|poCP{sP*8L6z3-~5KlO|!pN;Vfz`vUD$f>#J z{OwWYZedrx?`N+W-V_iH3|z*>oka_mKj7nVZaylp$8Imb)q~K}s^NSAtr1C?FN^%+ zd?p>-_J5SH|9*mRxbXZp{#9Pj69~5E`sv=5kviB8b5r79YSqgU1bsy$V5^ zdAZU~=+VkgjJ~K#n^L}~HaDn?Pp&l6=Q(D6C>@b0=l7YRy$y=gR1QOatTal)R6XJe zOXd*}p!p?Uqy=Z&6i;8oST6eKp=XjpGc~4Jqp}_#< z+@tIMm+URAYm0&gEeCe`CSs#)jjnU@cNX|nJ@*aNy^i!(L|VHjxyn)oI3S_4DxJTbgLto7X`dbF1j>Q!_@@#V1^!h#7?S< ziorb9Kc=`mmOs{gWXOnC2bD7)aeL|kAtK{vLWUYnWSn)F9XZWTYxU`hBq&|b_xqHDkGc9fV(jI}*>bIY* z-E$<7w3TVXk$WW5d&P?i{X0dxu&Id*zKY~Vt^UBN(T7{f(=TKnxtOb1!VXO5rbcYoPPw?wt69#L{KB>TW+3R?lF z^c&{5=bW}^Irmi{#R6IGS-G2z|BxO;OEa5NJd&(N*Y8T`SGFyW%>(5{3(M%0kHeG zOdg<+{*p<_15ouZ85{tmf61V52*4oyIS(og1vsuhWpEe-0C#`Mq~!lu3x~`6E0duE z*uU1w{_C4GMCKpgq@nPCY>|e+|M6EE4*w@s($cWs_GM?Ik^zv9B?JF2qR0*aZJaB1 z{Q$tmB!G2VC*c6GwmQf{QC&t2DG!rXS3}Cd;BYuZMovRjMjjz84}~M;AhHl;kRk$x zkd>B_N6M!xgp4X2DhHF2mY0F4D_gMx(3&0Kod7m#o{ua5 diff --git a/samples/bugs/PullRequest797-pdf.js.pdf b/samples/bugs/rawdata/PullRequest797-pdf.js.pdf similarity index 100% rename from samples/bugs/PullRequest797-pdf.js.pdf rename to samples/bugs/rawdata/PullRequest797-pdf.js.pdf diff --git a/samples/bugs/PullRequest797-vera.pdf b/samples/bugs/rawdata/PullRequest797-vera.pdf similarity index 100% rename from samples/bugs/PullRequest797-vera.pdf rename to samples/bugs/rawdata/PullRequest797-vera.pdf diff --git a/samples/bugs/PullRequestInvalidObjectReference.pdf b/samples/bugs/rawdata/PullRequestInvalidObjectReference.pdf similarity index 100% rename from samples/bugs/PullRequestInvalidObjectReference.pdf rename to samples/bugs/rawdata/PullRequestInvalidObjectReference.pdf diff --git a/tests/PHPUnit/Integration/RawData/RawDataParserTest.php b/tests/PHPUnit/Integration/RawData/RawDataParserTest.php index d4cdc1ef4..947349336 100644 --- a/tests/PHPUnit/Integration/RawData/RawDataParserTest.php +++ b/tests/PHPUnit/Integration/RawData/RawDataParserTest.php @@ -318,53 +318,27 @@ public function testGetXrefDataTracksVisitedOffsets(): void } /** - * Ensure parser resolves compressed object references from xref streams. - * - * @see https://github.com/smalot/pdfparser/pull/796 - */ - public function testParseFileWithCompressedObjRefInXrefStream(): void - { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestInvalidObjectReference.pdf'); - - self::assertCount(1, $document->getPages()); - } - - /** - * @see https://github.com/smalot/pdfparser/pull/797 - */ - public function testParseFileWhenStartxrefPointsToLeadingWhitespaceInVeraPdfFixture(): void - { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797-vera.pdf'); - - self::assertCount(1, $document->getPages()); - } - - /** - * @see https://github.com/smalot/pdfparser/pull/797 - */ - public function testParseFileWithCompressedXrefObjectFromPdfJsCorpus(): void - { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest797-pdf.js.pdf'); - - self::assertCount(1, $document->getPages()); - } - - /** - * @see https://github.com/smalot/pdfparser/pull/798 + * @return iterable */ - public function testParseFileWhenStartxrefPointsToLeadingWhitespaceInXrefStream(): void + public static function provideRawDataRegressionFixtures(): iterable { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequestStartxrefWhitespaceXrefStream.pdf'); - - self::assertCount(1, $document->getPages()); + yield 'pr796 invalid-object-reference / pr798 startxref-whitespace equivalent' => [ + 'rawdata/PullRequestInvalidObjectReference.pdf', + ]; + yield 'pr797 vera / pr798 pullrequest794 equivalent' => [ + 'rawdata/PullRequest797-vera.pdf', + ]; + yield 'pr797 pdf.js xref stream fixture' => [ + 'rawdata/PullRequest797-pdf.js.pdf', + ]; } /** - * @see https://github.com/smalot/pdfparser/pull/798 + * @dataProvider provideRawDataRegressionFixtures */ - public function testParseFileWhenStartxrefPointsNearXrefKeyword(): void + public function testParseFileWithRawDataRegressionFixture(string $fixturePath): void { - $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/PullRequest794.pdf'); + $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/'.$fixturePath); self::assertCount(1, $document->getPages()); }