@@ -22,7 +22,6 @@ This file is part of the iText (R) project.
2222 */
2323package com .itextpdf .pdfocr .tessdata ;
2424
25- import com .itextpdf .io .util .MessageFormatUtil ;
2625import com .itextpdf .kernel .colors .DeviceCmyk ;
2726import com .itextpdf .kernel .pdf .PdfWriter ;
2827import com .itextpdf .kernel .utils .CompareTool ;
@@ -32,14 +31,11 @@ This file is part of the iText (R) project.
3231import com .itextpdf .pdfocr .PdfOcrLogMessageConstant ;
3332import com .itextpdf .pdfocr .tesseract4 .AbstractTesseract4OcrEngine ;
3433import com .itextpdf .pdfocr .tesseract4 .Tesseract4OcrEngineProperties ;
35- import com .itextpdf .pdfocr .tesseract4 .Tesseract4OcrException ;
3634import com .itextpdf .pdfocr .tesseract4 .TextPositioning ;
3735import com .itextpdf .test .annotations .LogMessage ;
3836import com .itextpdf .test .annotations .LogMessages ;
3937
4038import java .io .File ;
41- import java .io .FileInputStream ;
42- import java .io .FileNotFoundException ;
4339import java .io .IOException ;
4440import java .nio .file .Files ;
4541import java .util .ArrayList ;
@@ -627,97 +623,6 @@ public void testJapaneseScript() {
627623 Assert .assertEquals (expected , result );
628624 }
629625
630- @ Test
631- public void testCustomUserWords () {
632- String imgPath = TEST_IMAGES_DIRECTORY + "wierdwords.png" ;
633- List <String > userWords = Arrays .<String >asList ("he23llo" , "qwetyrtyqpwe-rty" );
634-
635- Tesseract4OcrEngineProperties properties =
636- tesseractReader .getTesseract4OcrEngineProperties ();
637- properties .setLanguages (Arrays .asList ("fra" ));
638- properties .setUserWords ("fra" , userWords );
639- tesseractReader .setTesseract4OcrEngineProperties (properties );
640- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
641- Assert .assertTrue (result .contains (userWords .get (0 ))
642- || result .contains (userWords .get (1 )));
643-
644- Assert .assertTrue (tesseractReader .getTesseract4OcrEngineProperties ()
645- .getPathToUserWordsFile ().endsWith (".user-words" ));
646- }
647-
648- @ Test
649- public void testCustomUserWordsWithListOfLanguages () {
650- String imgPath = TEST_IMAGES_DIRECTORY + "bogusText.jpg" ;
651- String expectedOutput = "B1adeb1ab1a" ;
652-
653- Tesseract4OcrEngineProperties properties =
654- tesseractReader .getTesseract4OcrEngineProperties ();
655- properties .setLanguages (Arrays .asList ("fra" , "eng" ));
656- properties .setUserWords ("eng" , Arrays .<String >asList ("b1adeb1ab1a" ));
657- tesseractReader .setTesseract4OcrEngineProperties (properties );
658-
659- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
660- result = result .replace ("\n " , "" ).replace ("\f " , "" );
661- result = result .replaceAll ("[^\\ u0009\\ u000A\\ u000D\\ u0020-\\ u007E]" , "" );
662- Assert .assertTrue (result .startsWith (expectedOutput ));
663-
664- Assert .assertTrue (tesseractReader .getTesseract4OcrEngineProperties ()
665- .getPathToUserWordsFile ().endsWith (".user-words" ));
666- }
667-
668- @ Test
669- public void testUserWordsWithLanguageNotInList () throws FileNotFoundException {
670- junitExpectedException .expect (Tesseract4OcrException .class );
671- junitExpectedException .expectMessage (MessageFormatUtil
672- .format (Tesseract4OcrException .LANGUAGE_IS_NOT_IN_THE_LIST ,
673- "spa" ));
674- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
675- Tesseract4OcrEngineProperties properties =
676- tesseractReader .getTesseract4OcrEngineProperties ();
677- properties .setUserWords ("spa" , new FileInputStream (userWords ));
678- properties .setLanguages (new ArrayList <String >());
679- }
680-
681- @ Test
682- public void testIncorrectLanguageForUserWordsAsList () {
683- junitExpectedException .expect (Tesseract4OcrException .class );
684- junitExpectedException .expectMessage (MessageFormatUtil
685- .format (Tesseract4OcrException .LANGUAGE_IS_NOT_IN_THE_LIST ,
686- "eng1" ));
687- Tesseract4OcrEngineProperties properties =
688- tesseractReader .getTesseract4OcrEngineProperties ();
689- properties .setUserWords ("eng1" , Arrays .<String >asList ("word1" , "word2" ));
690- properties .setLanguages (new ArrayList <String >());
691- }
692-
693- @ Test
694- public void testUserWordsWithDefaultLanguageNotInList ()
695- throws FileNotFoundException {
696- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
697- Tesseract4OcrEngineProperties properties =
698- tesseractReader .getTesseract4OcrEngineProperties ();
699- properties .setUserWords ("eng" , new FileInputStream (userWords ));
700- properties .setLanguages (new ArrayList <String >());
701- tesseractReader .setTesseract4OcrEngineProperties (properties );
702- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg" ;
703- String expectedOutput = "619121" ;
704- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
705- Assert .assertTrue (result .startsWith (expectedOutput ));
706- }
707-
708- @ Test
709- public void testUserWordsFileNotDeleted () {
710- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
711- Tesseract4OcrEngineProperties properties =
712- tesseractReader .getTesseract4OcrEngineProperties ();
713- properties .setPathToUserWordsFile (userWords );
714- properties .setLanguages (Arrays .<String >asList ("eng" ));
715- tesseractReader .setTesseract4OcrEngineProperties (properties );
716- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg" ;
717- tesseractReader .doImageOcr (new File (imgPath ));
718- Assert .assertTrue (new File (userWords ).exists ());
719- }
720-
721626 /**
722627 * Do OCR for given image and compare result text file with expected one.
723628 */
0 commit comments