|
| 1 | +package com.itextpdf.pdfocr.onnxtr; |
| 2 | + |
| 3 | +import com.itextpdf.kernel.colors.Color; |
| 4 | +import com.itextpdf.kernel.colors.DeviceCmyk; |
| 5 | +import com.itextpdf.kernel.pdf.PdfDocument; |
| 6 | +import com.itextpdf.kernel.pdf.PdfReader; |
| 7 | +import com.itextpdf.kernel.pdf.PdfWriter; |
| 8 | +import com.itextpdf.pdfocr.OcrPdfCreator; |
| 9 | +import com.itextpdf.pdfocr.OcrPdfCreatorProperties; |
| 10 | +import com.itextpdf.pdfocr.onnxtr.detection.IDetectionPredictor; |
| 11 | +import com.itextpdf.pdfocr.onnxtr.detection.OnnxDetectionPredictor; |
| 12 | +import com.itextpdf.pdfocr.onnxtr.orientation.IOrientationPredictor; |
| 13 | +import com.itextpdf.pdfocr.onnxtr.orientation.OnnxOrientationPredictor; |
| 14 | +import com.itextpdf.pdfocr.onnxtr.recognition.IRecognitionPredictor; |
| 15 | +import com.itextpdf.pdfocr.onnxtr.recognition.OnnxRecognitionPredictor; |
| 16 | +import com.itextpdf.test.ExtendedITextTest; |
| 17 | +import org.junit.jupiter.api.AfterAll; |
| 18 | +import org.junit.jupiter.api.Assertions; |
| 19 | +import org.junit.jupiter.api.BeforeAll; |
| 20 | +import org.junit.jupiter.api.Tag; |
| 21 | +import org.junit.jupiter.api.Test; |
| 22 | + |
| 23 | +import java.io.File; |
| 24 | +import java.io.IOException; |
| 25 | +import java.io.InputStreamReader; |
| 26 | +import java.nio.file.Files; |
| 27 | +import java.nio.file.Paths; |
| 28 | +import java.util.Collections; |
| 29 | + |
| 30 | +@Tag("IntegrationTest") |
| 31 | +public class OnnxTRCmykIntegrationTest extends ExtendedITextTest { |
| 32 | + private static final String TEST_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/"; |
| 33 | + private static final String TEST_IMAGE_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/images/"; |
| 34 | + private static final String TARGET_DIRECTORY = "./target/test/resources/com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/"; |
| 35 | + private static final String FAST = "./src/test/resources/com/itextpdf/pdfocr/models/rep_fast_tiny-28867779.onnx"; |
| 36 | + private static final String CRNNVGG16 = "./src/test/resources/com/itextpdf/pdfocr/models/crnn_vgg16_bn-662979cc.onnx"; |
| 37 | + private static final String MOBILENETV3 = "./src/test/resources/com/itextpdf/pdfocr/models/mobilenet_v3_small_crop_orientation-5620cf7e.onnx"; |
| 38 | + private static OnnxTrOcrEngine OCR_ENGINE; |
| 39 | + |
| 40 | + @BeforeAll |
| 41 | + public static void beforeClass() { |
| 42 | + createOrClearDestinationFolder(TARGET_DIRECTORY); |
| 43 | + |
| 44 | + IDetectionPredictor detectionPredictor = OnnxDetectionPredictor.fast(FAST); |
| 45 | + IRecognitionPredictor recognitionPredictor = OnnxRecognitionPredictor.crnnVgg16(CRNNVGG16); |
| 46 | + IOrientationPredictor orientationPredictor = OnnxOrientationPredictor.mobileNetV3(MOBILENETV3); |
| 47 | + |
| 48 | + OCR_ENGINE = new OnnxTrOcrEngine(detectionPredictor, orientationPredictor, |
| 49 | + recognitionPredictor); |
| 50 | + } |
| 51 | + |
| 52 | + @AfterAll |
| 53 | + public static void afterClass() throws Exception { |
| 54 | + OCR_ENGINE.close(); |
| 55 | + } |
| 56 | + |
| 57 | + @Test |
| 58 | + public void rainbowInvertedCmykTest() throws IOException { |
| 59 | + String src = TEST_IMAGE_DIRECTORY + "rainbow_inverted_cmyk.jpg"; |
| 60 | + String dest = TARGET_DIRECTORY + "rainbowInvertedCmykTest.pdf"; |
| 61 | + String cmpTxt = TEST_DIRECTORY + "cmp_rainbowInvertedCmykTest.txt"; |
| 62 | + |
| 63 | + if (isFixedInJdk(System.getProperty("java.version"))) { |
| 64 | + doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA)); |
| 65 | + try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) { |
| 66 | + ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1"); |
| 67 | + Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor()); |
| 68 | + Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText()); |
| 69 | + } |
| 70 | + } else { |
| 71 | + Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null)); |
| 72 | + Assertions.assertEquals("Failed to read image.", e.getMessage()); |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + @Test |
| 77 | + public void rainbowAdobeCmykTest() throws IOException { |
| 78 | + String src = TEST_IMAGE_DIRECTORY + "rainbow_adobe_cmyk.jpg"; |
| 79 | + String dest = TARGET_DIRECTORY + "rainbowAdobeCmykTest.pdf"; |
| 80 | + String cmpTxt = TEST_DIRECTORY + "cmp_rainbowAdobeCmykTest.txt"; |
| 81 | + |
| 82 | + if (isFixedInJdk(System.getProperty("java.version"))) { |
| 83 | + doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA)); |
| 84 | + try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) { |
| 85 | + ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1"); |
| 86 | + Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor()); |
| 87 | + Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText()); |
| 88 | + } |
| 89 | + } else { |
| 90 | + Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null)); |
| 91 | + Assertions.assertEquals("Failed to read image.", e.getMessage()); |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + @Test |
| 96 | + public void rainbowCmykNoProfileTest() throws IOException { |
| 97 | + String src = TEST_IMAGE_DIRECTORY + "rainbow_cmyk_inverted_no_profile.jpg"; |
| 98 | + String dest = TARGET_DIRECTORY + "rainbowCmykNoProfileTest.pdf"; |
| 99 | + String cmpTxt = TEST_DIRECTORY + "cmp_rainbowCmykNoProfileTest.txt"; |
| 100 | + |
| 101 | + if (isFixedInJdk(System.getProperty("java.version"))) { |
| 102 | + doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA)); |
| 103 | + try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) { |
| 104 | + ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1"); |
| 105 | + Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor()); |
| 106 | + Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText()); |
| 107 | + } |
| 108 | + } else { |
| 109 | + Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null)); |
| 110 | + Assertions.assertEquals("Failed to read image.", e.getMessage()); |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + private static boolean isFixedInJdk(String versionStr) { |
| 115 | + //fixed for jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4 |
| 116 | + boolean isFixed = false; |
| 117 | + int majorVer = getMajorVer(versionStr); |
| 118 | + String[] split = versionStr.split("[._-]"); |
| 119 | + int minorVer = Integer.parseInt(split[split.length - 1]); |
| 120 | + |
| 121 | + if (minorVer % 10 == 2) { |
| 122 | + return false; |
| 123 | + } |
| 124 | + switch (majorVer) { |
| 125 | + case 8: |
| 126 | + isFixed = minorVer >= 351; |
| 127 | + break; |
| 128 | + case 11: |
| 129 | + isFixed = minorVer >= 16; |
| 130 | + break; |
| 131 | + case 17: |
| 132 | + isFixed = minorVer >= 4; |
| 133 | + break; |
| 134 | + default: |
| 135 | + isFixed = true; |
| 136 | + } |
| 137 | + |
| 138 | + return isFixed; |
| 139 | + } |
| 140 | + |
| 141 | + private static int getMajorVer(String versionStr) { |
| 142 | + int majorVer = 0; |
| 143 | + String[] split = versionStr.split("\\."); |
| 144 | + if (versionStr.startsWith("1.")) { |
| 145 | + //jdk versions 1 - 8 have 1. as prefix |
| 146 | + majorVer = Integer.parseInt(split[1]); |
| 147 | + } else { |
| 148 | + majorVer = Integer.parseInt(split[0]); |
| 149 | + } |
| 150 | + return majorVer; |
| 151 | + } |
| 152 | + |
| 153 | + private OcrPdfCreatorProperties creatorProperties(String layerName, Color color) { |
| 154 | + OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); |
| 155 | + ocrPdfCreatorProperties.setTextLayerName(layerName); |
| 156 | + ocrPdfCreatorProperties.setTextColor(color); |
| 157 | + return ocrPdfCreatorProperties; |
| 158 | + } |
| 159 | + |
| 160 | + private void doOcrAndCreatePdf(String imagePath, String destPdfPath, |
| 161 | + OcrPdfCreatorProperties ocrPdfCreatorProperties) throws IOException { |
| 162 | + OcrPdfCreator ocrPdfCreator = |
| 163 | + ocrPdfCreatorProperties != null ? new OcrPdfCreator(OCR_ENGINE, ocrPdfCreatorProperties) |
| 164 | + : new OcrPdfCreator(OCR_ENGINE); |
| 165 | + try (PdfWriter writer = new PdfWriter(destPdfPath)) { |
| 166 | + ocrPdfCreator.createPdf(Collections.singletonList(new File(imagePath)), writer).close(); |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + private String getCmpText(String txtPath) throws IOException { |
| 171 | + int bytesCount = (int) new File(txtPath).length(); |
| 172 | + char[] array = new char[bytesCount]; |
| 173 | + try (InputStreamReader stream = new InputStreamReader(Files.newInputStream(Paths.get(txtPath)))) { |
| 174 | + stream.read(array, 0, bytesCount); |
| 175 | + return new String(array); |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | +} |
0 commit comments