Skip to content

Commit 63e00df

Browse files
committed
Allow multiple files for IOcrEngine#doImageOcr input
DEVSIX-9792
1 parent 9dd0dd1 commit 63e00df

12 files changed

Lines changed: 219 additions & 16 deletions

File tree

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public interface IOcrEngine {
4040
* in the format described below.
4141
*
4242
* @param input input image {@link java.io.File}
43+
*
4344
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
4445
* representing the number of the page and value is
4546
* {@link java.util.List} of {@link TextInfo} elements where each
@@ -63,6 +64,35 @@ public interface IOcrEngine {
6364
*/
6465
Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext);
6566

67+
/**
68+
* Reads data from the provided list of input image files and returns retrieved data
69+
* in the format described below.
70+
*
71+
* @param inputs list of {@link java.io.File} input images
72+
*
73+
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
74+
* representing the number of the page and value is
75+
* {@link java.util.List} of {@link TextInfo} elements where each
76+
* {@link TextInfo} element contains a word or a line and its 4
77+
* coordinates(bbox)
78+
*/
79+
Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs);
80+
81+
/**
82+
* Reads data from the provided list of input image files and returns retrieved data
83+
* in the format described below.
84+
*
85+
* @param inputs list of {@link java.io.File} input images
86+
* @param ocrProcessContext ocr processing context
87+
*
88+
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
89+
* representing the number of the page and value is
90+
* {@link java.util.List} of {@link TextInfo} elements where each
91+
* {@link TextInfo} element contains a word or a line and its 4
92+
* coordinates(bbox)
93+
*/
94+
Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext);
95+
6696
/**
6797
* Performs OCR using provided {@link IOcrEngine} for the given list of
6898
* input images and saves output to a text file using provided path.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,14 @@ This file is part of the iText (R) project.
6767
import com.itextpdf.pdfocr.ImageExtraction.PageImageData;
6868
import com.itextpdf.pdfocr.exceptions.PdfOcrException;
6969
import com.itextpdf.pdfocr.exceptions.PdfOcrExceptionMessageConstant;
70+
import com.itextpdf.pdfocr.exceptions.PdfOcrInputException;
7071
import com.itextpdf.pdfocr.logs.PdfOcrLogMessageConstant;
7172
import com.itextpdf.pdfocr.statistics.PdfOcrOutputType;
7273
import com.itextpdf.pdfocr.statistics.PdfOcrOutputTypeStatisticsEvent;
7374
import com.itextpdf.pdfocr.structuretree.ArtifactItem;
7475
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
76+
import com.itextpdf.pdfocr.util.TiffImageUtil;
77+
7578
import org.slf4j.Logger;
7679
import org.slf4j.LoggerFactory;
7780

@@ -206,8 +209,27 @@ public final PdfDocument createPdfA(final List<File> inputImages,
206209
Map<File, Map<Integer, List<TextInfo>>> imagesTextData = new LinkedHashMap<File, Map<Integer, List<TextInfo>>>(
207210
inputImages.size() * 2);
208211

209-
for (File inputImage : inputImages) {
210-
imagesTextData.put(inputImage, ocrEngine.doImageOcr(inputImage, ocrProcessContext));
212+
Map<Integer, List<TextInfo>> imagesTextDataInfos = ocrEngine.doImageOcr(inputImages, ocrProcessContext);
213+
if (!imagesTextDataInfos.isEmpty()) {
214+
int i = 0;
215+
for (File inputImage : inputImages) {
216+
try {
217+
int pageCount =
218+
TiffImageUtil.isTiffImage(inputImage) ? PdfCreatorUtil.getNumberOfPageTiff(inputImage) : 1;
219+
Map<Integer, List<TextInfo>> currentImagesTextDataInfos = new HashMap<>();
220+
for (int j = 0; j <= pageCount; j++) {
221+
currentImagesTextDataInfos.put(j, imagesTextDataInfos.get(i + j));
222+
}
223+
i += pageCount;
224+
imagesTextData.put(inputImage, currentImagesTextDataInfos);
225+
} catch (IOException | com.itextpdf.io.exceptions.IOException e) {
226+
LOGGER.error(MessageFormatUtil.format(
227+
PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE,
228+
e.getMessage()));
229+
throw new PdfOcrInputException(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE, e);
230+
}
231+
232+
}
211233
}
212234

213235
// create PdfDocument

pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfCreatorUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ static float getPoints(final float pixels) {
286286
* {@link com.itextpdf.io.source.IRandomAccessSource} based on a filename
287287
* string
288288
*/
289-
private static int getNumberOfPageTiff(final File inputImage)
289+
static int getNumberOfPageTiff(final File inputImage)
290290
throws IOException {
291291
RandomAccessFileOrArray raf = new RandomAccessFileOrArray(
292292
new RandomAccessSourceFactory()

pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfInputImageTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ This file is part of the iText (R) project.
2222
*/
2323
package com.itextpdf.pdfocr;
2424

25+
import com.itextpdf.commons.utils.MessageFormatUtil;
2526
import com.itextpdf.pdfocr.exceptions.PdfOcrExceptionMessageConstant;
2627
import com.itextpdf.pdfocr.exceptions.PdfOcrInputException;
2728
import com.itextpdf.pdfocr.helpers.PdfHelper;
@@ -59,7 +60,8 @@ public void corruptedImageWithoutExtensionTest() {
5960
+ "corrupted");
6061
Exception e = Assertions.assertThrows(PdfOcrInputException.class,
6162
() -> PdfHelper.getTextFromPdf(file, "testCorruptedImageWithoutExtension"));
62-
Assertions.assertEquals(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE, e.getMessage());
63+
Assertions.assertEquals(MessageFormatUtil.format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE_PARAMS,
64+
file.getAbsolutePath()), e.getMessage());
6365
}
6466

6567
@LogMessages(messages = {
@@ -70,6 +72,7 @@ public void invalidPathWithDotTest() {
7072
File file = new File("test.Name");
7173
Exception e = Assertions.assertThrows(PdfOcrInputException.class,
7274
() -> PdfHelper.getTextFromPdf(file, "testInvalidPathWithDot"));
73-
Assertions.assertEquals(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE, e.getMessage());
75+
Assertions.assertEquals(MessageFormatUtil.format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE_PARAMS,
76+
file.getAbsolutePath()), e.getMessage());
7477
}
7578
}

pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/CustomOcrEngine.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,30 @@ public Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocr
6262
return doImageOcr(input);
6363
}
6464

65+
/**
66+
* {@inheritDoc}
67+
*/
68+
@Override
69+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs) {
70+
Map<Integer, List<TextInfo>> result = new HashMap<>();
71+
for (File file : inputs) {
72+
Map<Integer, List<TextInfo>> imageOcr = doImageOcr(file);
73+
int pageShift = result.size();
74+
for (Map.Entry<Integer, List<TextInfo>> entry : imageOcr.entrySet()) {
75+
result.put(entry.getKey() + pageShift, entry.getValue());
76+
}
77+
}
78+
return result;
79+
}
80+
81+
/**
82+
* {@inheritDoc}
83+
*/
84+
@Override
85+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
86+
return doImageOcr(inputs);
87+
}
88+
6589
@Override
6690
public void createTxtFile(List<File> inputImages, File txtFile) {
6791
}

pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/CustomProductAwareOcrEngine.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,22 @@ public Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocr
5353
return doImageOcr(input);
5454
}
5555

56+
/**
57+
* {@inheritDoc}
58+
*/
59+
@Override
60+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs) {
61+
return Collections.<Integer, List<TextInfo>>emptyMap();
62+
}
63+
64+
/**
65+
* {@inheritDoc}
66+
*/
67+
@Override
68+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
69+
return doImageOcr(inputs);
70+
}
71+
5672
@Override
5773
public void createTxtFile(List<File> inputImages, File txtFile) {
5874
}

pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/TestStructureDetectionOcrEngine.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,30 @@ public Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocr
9393
return result;
9494
}
9595

96+
/**
97+
* {@inheritDoc}
98+
*/
99+
@Override
100+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs) {
101+
return null;
102+
}
103+
104+
/**
105+
* {@inheritDoc}
106+
*/
107+
@Override
108+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
109+
Map<Integer, List<TextInfo>> result = new HashMap<>();
110+
for (File file : inputs) {
111+
Map<Integer, List<TextInfo>> imageOcr = doImageOcr(file, ocrProcessContext);
112+
int pageShift = result.size();
113+
for (Map.Entry<Integer, List<TextInfo>> entry : imageOcr.entrySet()) {
114+
result.put(entry.getKey() + pageShift, entry.getValue());
115+
}
116+
}
117+
return result;
118+
}
119+
96120
@Override
97121
public void createTxtFile(List<File> inputImages, File txtFile) {
98122
}

pdfocr-onnx-abstract/src/main/java/com/itextpdf/pdfocr/onnx/OnnxOcrEngine.java

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ This file is part of the iText (R) project.
4343

4444
import java.awt.image.BufferedImage;
4545
import java.io.File;
46+
import java.util.ArrayList;
4647
import java.util.Collections;
4748
import java.util.List;
4849
import java.util.Map;
@@ -148,7 +149,23 @@ public Map<Integer, List<TextInfo>> doImageOcr(File input) {
148149
*/
149150
@Override
150151
public Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext) {
151-
Map<Integer, List<TextInfo>> result = doOnnxOcr(input, ocrProcessContext);
152+
return doImageOcr(Collections.singletonList(input), ocrProcessContext);
153+
}
154+
155+
/**
156+
* {@inheritDoc}
157+
*/
158+
@Override
159+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs) {
160+
return doImageOcr(inputs, new OcrProcessContext(new OnnxEventHelper()));
161+
}
162+
163+
/**
164+
* {@inheritDoc}
165+
*/
166+
@Override
167+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
168+
Map<Integer, List<TextInfo>> result = doOnnxOcr(inputs, ocrProcessContext);
152169
if (TextPositioning.BY_WORDS.equals(properties.getTextPositioning())) {
153170
PdfOcrTextBuilder.sortTextInfosByLines(result);
154171
} else if (TextPositioning.BY_LINES.equals(properties.getTextPositioning())) {
@@ -188,12 +205,10 @@ public void createTxtFile(List<File> inputImages, File txtFile, OcrProcessContex
188205
OnnxFileResultEventHelper fileResultEventHelper = new OnnxFileResultEventHelper(storedEventHelper);
189206
ocrProcessContext.setOcrEventHelper(fileResultEventHelper);
190207

191-
StringBuilder content = new StringBuilder();
192-
for (File inputImage : inputImages) {
193-
Map<Integer, List<TextInfo>> outputMap = doOnnxOcr(inputImage, ocrProcessContext);
194-
content.append(PdfOcrTextBuilder.buildText(outputMap));
195-
}
196-
PdfOcrFileUtil.writeToTextFile(txtFile.getAbsolutePath(), content.toString());
208+
Map<Integer, List<TextInfo>> outputMap =
209+
doOnnxOcr(inputImages, ocrProcessContext);
210+
String content = PdfOcrTextBuilder.buildText(outputMap);
211+
PdfOcrFileUtil.writeToTextFile(txtFile.getAbsolutePath(), content);
197212

198213
fileResultEventHelper.registerAllSavedEvents();
199214
} finally {
@@ -248,10 +263,10 @@ static List<BufferedImage> getImages(File input) {
248263
}
249264

250265
/**
251-
* Reads raw data from the provided input image file and returns retrieved data
266+
* Reads raw data from the provided input image files and returns retrieved data
252267
* in the format described below.
253268
*
254-
* @param input input image {@link java.io.File}
269+
* @param input {@link java.util.List} of input image files
255270
* @param ocrProcessContext ocr processing context
256271
*
257272
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
@@ -260,8 +275,11 @@ static List<BufferedImage> getImages(File input) {
260275
* {@link TextInfo} element contains a word or a line and its 4
261276
* coordinates(bbox)
262277
*/
263-
private Map<Integer, List<TextInfo>> doOnnxOcr(File input, OcrProcessContext ocrProcessContext) {
264-
final List<BufferedImage> images = getImages(input);
278+
private Map<Integer, List<TextInfo>> doOnnxOcr(List<File> input, OcrProcessContext ocrProcessContext) {
279+
final List<BufferedImage> images = new ArrayList<>();
280+
for (File file : input) {
281+
images.addAll(getImages(file));
282+
}
265283
OnnxProcessor onnxProcessor = new OnnxProcessor(detectionPredictor, orientationPredictor,
266284
recognitionPredictor);
267285
return onnxProcessor.doOcr(images, ocrProcessContext);

pdfocr-onnx-abstract/src/test/java/com/itextpdf/pdfocr/onnx/OnnxIntegrationTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,5 +355,10 @@ public RotationAgnosticOnnxOcrEngine(IDetectionPredictor detectionPredictor,
355355
public Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext) {
356356
return PdfOcrTextBuilder.correctRotationAngle(super.doImageOcr(input, ocrProcessContext));
357357
}
358+
359+
@Override
360+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
361+
return PdfOcrTextBuilder.correctRotationAngle(super.doImageOcr(inputs, ocrProcessContext));
362+
}
358363
}
359364
}

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,30 @@ public final Map<Integer, List<TextInfo>> doImageOcr(
253253
OutputFormat.HOCR, ocrProcessContext.getOcrEventHelper())).getTextInfos();
254254
}
255255

256+
/**
257+
* {@inheritDoc}
258+
*/
259+
@Override
260+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs) {
261+
return doImageOcr(inputs, new OcrProcessContext(new Tesseract4EventHelper()));
262+
}
263+
264+
/**
265+
* {@inheritDoc}
266+
*/
267+
@Override
268+
public Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext) {
269+
Map<Integer, List<TextInfo>> allTextInfos = new LinkedHashMap<>();
270+
for (File image : inputs) {
271+
Map<Integer, List<TextInfo>> imageTextInfos = doImageOcr(image, ocrProcessContext);
272+
int pageShift = allTextInfos.size();
273+
for (Map.Entry<Integer, List<TextInfo>> entry : imageTextInfos.entrySet()) {
274+
allTextInfos.put(entry.getKey() + pageShift, entry.getValue());
275+
}
276+
}
277+
return allTextInfos;
278+
}
279+
256280
/**
257281
* Reads data from the provided input image file and returns retrieved
258282
* data as string.

0 commit comments

Comments
 (0)