@@ -297,6 +297,45 @@ abstract void doTesseractOcr(File inputImage,
297297 List <File > outputFiles , OutputFormat outputFormat ,
298298 int pageNumber );
299299
300+ /**
301+ * Gets path to provided tess data directory.
302+ *
303+ * @return path to provided tess data directory as
304+ * {@link java.lang.String}
305+ */
306+ String getTessData () {
307+ if (getTesseract4OcrEngineProperties ().getPathToTessData () == null ) {
308+ throw new Tesseract4OcrException (Tesseract4OcrException
309+ .PATH_TO_TESS_DATA_IS_NOT_SET );
310+ } else {
311+ return getTesseract4OcrEngineProperties ().getPathToTessData ()
312+ .getAbsolutePath ();
313+ }
314+ }
315+
316+ void scheduledCheck () {
317+ ReflectionUtils .scheduledCheck ();
318+ }
319+
320+ void onEvent () {
321+ IMetaInfo metaInfo = this .getThreadLocalMetaInfo ();
322+ if (!(metaInfo instanceof OcrPdfCreatorMetaInfo )) {
323+ EventCounterHandler .getInstance ()
324+ .onEvent (PdfOcrTesseract4Event .TESSERACT4_IMAGE_OCR , this .getThreadLocalMetaInfo (), getClass ());
325+ } else {
326+ UUID uuid = ((OcrPdfCreatorMetaInfo ) metaInfo ).getDocumentId ();
327+ if (!processedUUID .contains (uuid )) {
328+ processedUUID .add (uuid );
329+ EventCounterHandler .getInstance ()
330+ .onEvent (PdfDocumentType .PDFA .equals (((OcrPdfCreatorMetaInfo ) metaInfo ).getPdfDocumentType ())
331+ ? PdfOcrTesseract4Event .TESSERACT4_IMAGE_TO_PDFA
332+ : PdfOcrTesseract4Event .TESSERACT4_IMAGE_TO_PDF ,
333+ ((OcrPdfCreatorMetaInfo ) metaInfo ).getWrappedMetaInfo (), getClass ());
334+
335+ }
336+ }
337+ }
338+
300339 /**
301340 * Reads data from the provided input image file.
302341 *
@@ -306,7 +345,7 @@ abstract void doTesseractOcr(File inputImage,
306345 * @return {@link ITesseractOcrResult} instance, either {@link StringTesseractOcrResult}
307346 * if output format is TXT, or {@link TextInfoTesseractOcrResult} if the output format is HOCR
308347 */
309- ITesseractOcrResult processInputFiles (
348+ private ITesseractOcrResult processInputFiles (
310349 final File input , final OutputFormat outputFormat ) {
311350 Map <Integer , List <TextInfo >> imageData =
312351 new LinkedHashMap <Integer , List <TextInfo >>();
@@ -320,10 +359,10 @@ ITesseractOcrResult processInputFiles(
320359 ? 1 : ImagePreprocessingUtil .getNumberOfPageTiff (input );
321360 int numOfPages =
322361 getTesseract4OcrEngineProperties ().isPreprocessingImages ()
323- ? realNumOfPages : 1 ;
362+ ? realNumOfPages : 1 ;
324363 int numOfFiles =
325364 getTesseract4OcrEngineProperties ().isPreprocessingImages ()
326- ? 1 : realNumOfPages ;
365+ ? 1 : realNumOfPages ;
327366
328367 for (int page = 1 ; page <= numOfPages ; page ++) {
329368 String extension = outputFormat .equals (OutputFormat .HOCR )
@@ -370,45 +409,6 @@ ITesseractOcrResult processInputFiles(
370409 return result ;
371410 }
372411
373- /**
374- * Gets path to provided tess data directory.
375- *
376- * @return path to provided tess data directory as
377- * {@link java.lang.String}
378- */
379- String getTessData () {
380- if (getTesseract4OcrEngineProperties ().getPathToTessData () == null ) {
381- throw new Tesseract4OcrException (Tesseract4OcrException
382- .PATH_TO_TESS_DATA_IS_NOT_SET );
383- } else {
384- return getTesseract4OcrEngineProperties ().getPathToTessData ()
385- .getAbsolutePath ();
386- }
387- }
388-
389- void scheduledCheck () {
390- ReflectionUtils .scheduledCheck ();
391- }
392-
393- void onEvent () {
394- IMetaInfo metaInfo = this .getThreadLocalMetaInfo ();
395- if (!(metaInfo instanceof OcrPdfCreatorMetaInfo )) {
396- EventCounterHandler .getInstance ()
397- .onEvent (PdfOcrTesseract4Event .TESSERACT4_IMAGE_OCR , this .getThreadLocalMetaInfo (), getClass ());
398- } else {
399- UUID uuid = ((OcrPdfCreatorMetaInfo ) metaInfo ).getDocumentId ();
400- if (!processedUUID .contains (uuid )) {
401- processedUUID .add (uuid );
402- EventCounterHandler .getInstance ()
403- .onEvent (PdfDocumentType .PDFA .equals (((OcrPdfCreatorMetaInfo ) metaInfo ).getPdfDocumentType ())
404- ? PdfOcrTesseract4Event .TESSERACT4_IMAGE_TO_PDFA
405- : PdfOcrTesseract4Event .TESSERACT4_IMAGE_TO_PDF ,
406- ((OcrPdfCreatorMetaInfo ) metaInfo ).getWrappedMetaInfo (), getClass ());
407-
408- }
409- }
410- }
411-
412412 /**
413413 * Creates a temporary file with given extension.
414414 *
@@ -456,10 +456,10 @@ private void verifyImageFormatValidity(final File image)
456456 }
457457 }
458458
459- private interface ITesseractOcrResult {
459+ interface ITesseractOcrResult {
460460 }
461461
462- private static class StringTesseractOcrResult implements ITesseractOcrResult {
462+ static class StringTesseractOcrResult implements ITesseractOcrResult {
463463 private String data ;
464464
465465 StringTesseractOcrResult (String data ) {
@@ -471,7 +471,7 @@ String getData() {
471471 }
472472 }
473473
474- private static class TextInfoTesseractOcrResult implements ITesseractOcrResult {
474+ static class TextInfoTesseractOcrResult implements ITesseractOcrResult {
475475 private Map <Integer , List <TextInfo >> textInfos ;
476476
477477 TextInfoTesseractOcrResult (Map <Integer , List <TextInfo >> textInfos ) {
0 commit comments