@@ -22,11 +22,20 @@ This file is part of the iText (R) project.
2222 */
2323package com .itextpdf .pdfocr ;
2424
25+ import com .itextpdf .commons .actions .contexts .IMetaInfo ;
2526import com .itextpdf .io .image .ImageData ;
2627import com .itextpdf .kernel .colors .DeviceRgb ;
2728import com .itextpdf .kernel .font .PdfFont ;
2829import com .itextpdf .kernel .geom .Rectangle ;
30+ import com .itextpdf .kernel .pdf .DocumentProperties ;
31+ import com .itextpdf .kernel .pdf .PdfAConformanceLevel ;
32+ import com .itextpdf .kernel .pdf .PdfDocument ;
33+ import com .itextpdf .kernel .pdf .PdfReader ;
34+ import com .itextpdf .kernel .pdf .PdfWriter ;
35+ import com .itextpdf .kernel .pdf .canvas .parser .PdfTextExtractor ;
36+ import com .itextpdf .pdfa .PdfADocument ;
2937import com .itextpdf .pdfocr .helpers .CustomOcrEngine ;
38+ import com .itextpdf .pdfocr .helpers .CustomProductAwareOcrEngine ;
3039import com .itextpdf .pdfocr .helpers .ExtractionStrategy ;
3140import com .itextpdf .pdfocr .helpers .PdfHelper ;
3241import com .itextpdf .test .ExtendedITextTest ;
@@ -35,12 +44,15 @@ This file is part of the iText (R) project.
3544import com .itextpdf .test .annotations .type .IntegrationTest ;
3645
3746import java .io .File ;
47+ import java .io .FileNotFoundException ;
3848import java .io .IOException ;
49+ import java .nio .charset .StandardCharsets ;
3950import java .util .Arrays ;
4051import java .util .Collections ;
4152import java .util .List ;
4253import java .util .Map ;
4354import org .junit .Assert ;
55+ import org .junit .BeforeClass ;
4456import org .junit .Rule ;
4557import org .junit .Test ;
4658import org .junit .experimental .categories .Category ;
@@ -49,9 +61,119 @@ This file is part of the iText (R) project.
4961@ Category (IntegrationTest .class )
5062public class ApiTest extends ExtendedITextTest {
5163
64+ public static final String DESTINATION_FOLDER = "./target/test/com/itextpdf/pdfocr" ;
65+
66+ @ BeforeClass
67+ public static void beforeClass () {
68+ createOrClearDestinationFolder (DESTINATION_FOLDER );
69+ }
70+
5271 @ Rule
5372 public ExpectedException junitExpectedException = ExpectedException .none ();
5473
74+ @ Test
75+ public void createPdfWithFileTest () {
76+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
77+ .setMetaInfo (new DummyMetaInfo ());
78+ OcrPdfCreator pdfCreator = new OcrPdfCreator (new CustomOcrEngine (), props );
79+ try (PdfDocument pdf = pdfCreator .createPdf (
80+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
81+ PdfHelper .getPdfWriter (),
82+ new DocumentProperties ().setEventCountingMetaInfo (new DummyMetaInfo ())
83+ )) {
84+ String contentBytes = new String (pdf .getPage (1 ).getContentBytes (), StandardCharsets .UTF_8 );
85+ Assert .assertTrue (contentBytes .contains ("<00190014001c001400150014>" ));
86+ }
87+ }
88+
89+ @ Test
90+ public void createPdfFileWithFileTest () throws IOException {
91+ String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf" ;
92+
93+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
94+ .setMetaInfo (new DummyMetaInfo ());
95+ OcrPdfCreator pdfCreator = new OcrPdfCreator (new CustomOcrEngine (), props );
96+ pdfCreator .createPdfFile (
97+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
98+ new File (output ));
99+
100+ try (PdfDocument pdf = new PdfDocument (new PdfReader (output ))) {
101+ String contentBytes = new String (pdf .getPage (1 ).getContentBytes (), StandardCharsets .UTF_8 );
102+ Assert .assertTrue (contentBytes .contains ("<00190014001c001400150014>" ));
103+ }
104+ }
105+
106+ @ Test
107+ public void createPdfAWithFileTest () throws FileNotFoundException {
108+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
109+ .setMetaInfo (new DummyMetaInfo ())
110+ .setPdfLang ("en-US" );
111+ OcrPdfCreator pdfCreator = new OcrPdfCreator (new CustomOcrEngine (), props );
112+ try (PdfDocument pdf = pdfCreator .createPdfA (
113+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
114+ PdfHelper .getPdfWriter (),
115+ new DocumentProperties ().setEventCountingMetaInfo (new DummyMetaInfo ()),
116+ PdfHelper .getRGBPdfOutputIntent ()
117+ )) {
118+ String contentBytes = new String (pdf .getPage (1 ).getContentBytes (), StandardCharsets .UTF_8 );
119+ Assert .assertTrue (contentBytes .contains ("<00190014001c001400150014>" ));
120+ Assert .assertTrue (pdf instanceof PdfADocument );
121+ }
122+ }
123+
124+ @ Test
125+ public void createPdfAFileWithFileTest () throws IOException {
126+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf" ;
127+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
128+ .setMetaInfo (new DummyMetaInfo ())
129+ .setPdfLang ("en-US" );
130+ OcrPdfCreator pdfCreator = new OcrPdfCreator (new CustomOcrEngine (), props );
131+ pdfCreator .createPdfAFile (
132+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
133+ new File (output ),
134+ PdfHelper .getRGBPdfOutputIntent ());
135+ try (PdfDocument pdf = new PdfDocument (new PdfReader (output ))) {
136+ String contentBytes = new String (pdf .getPage (1 ).getContentBytes (), StandardCharsets .UTF_8 );
137+ Assert .assertTrue (contentBytes .contains ("<00190014001c001400150014>" ));
138+ PdfAConformanceLevel cl = pdf .getReader ().getPdfAConformanceLevel ();
139+ Assert .assertEquals (PdfAConformanceLevel .PDF_A_3U .getConformance (), cl .getConformance ());
140+ Assert .assertEquals (PdfAConformanceLevel .PDF_A_3U .getPart (), cl .getPart ());
141+ }
142+ }
143+
144+ @ Test
145+ public void createPdfAFileWithFileNoMetaTest () throws IOException {
146+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf" ;
147+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
148+ .setPdfLang ("en-US" );
149+ OcrPdfCreator pdfCreator = new OcrPdfCreator (new CustomOcrEngine (), props );
150+ pdfCreator .createPdfAFile (
151+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
152+ new File (output ),
153+ PdfHelper .getRGBPdfOutputIntent ());
154+ try (PdfDocument pdf = new PdfDocument (new PdfReader (output ))) {
155+ String contentBytes = new String (pdf .getPage (1 ).getContentBytes (), StandardCharsets .UTF_8 );
156+ Assert .assertTrue (contentBytes .contains ("<00190014001c001400150014>" ));
157+ PdfAConformanceLevel cl = pdf .getReader ().getPdfAConformanceLevel ();
158+ Assert .assertEquals (PdfAConformanceLevel .PDF_A_3U .getConformance (), cl .getConformance ());
159+ Assert .assertEquals (PdfAConformanceLevel .PDF_A_3U .getPart (), cl .getPart ());
160+ }
161+ }
162+
163+ @ Test
164+ public void createPdfAFileWithFileProductAwareEngineTest () throws IOException {
165+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf" ;
166+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ()
167+ .setPdfLang ("en-US" );
168+ CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine ();
169+ OcrPdfCreator pdfCreator = new OcrPdfCreator (ocrEngine , props );
170+ pdfCreator .createPdfAFile (
171+ Collections .<File >singletonList (new File (PdfHelper .getDefaultImagePath ())),
172+ new File (output ),
173+ PdfHelper .getRGBPdfOutputIntent ());
174+
175+ Assert .assertTrue (ocrEngine .isGetMetaInfoContainerTriggered ());
176+ }
55177
56178 @ Test
57179 public void testTextInfo () {
@@ -130,4 +252,7 @@ public ImageData applyRotation(ImageData imageData) {
130252 throw new RuntimeException ("applyRotation is not implemented" );
131253 }
132254 }
255+
256+ private static class DummyMetaInfo implements IMetaInfo {
257+ }
133258}
0 commit comments