Skip to content

Commit 7530d2d

Browse files
author
Dmitry Radchuk
committed
Add CMYK Jpeg tests
DEVSIX-9279
1 parent b7e3ff4 commit 7530d2d

8 files changed

Lines changed: 275 additions & 0 deletions

File tree

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package com.itextpdf.pdfocr.onnxtr;
2+
3+
import com.itextpdf.kernel.colors.Color;
4+
import com.itextpdf.kernel.colors.DeviceCmyk;
5+
import com.itextpdf.kernel.pdf.PdfDocument;
6+
import com.itextpdf.kernel.pdf.PdfReader;
7+
import com.itextpdf.kernel.pdf.PdfWriter;
8+
import com.itextpdf.pdfocr.OcrPdfCreator;
9+
import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
10+
import com.itextpdf.pdfocr.onnxtr.detection.IDetectionPredictor;
11+
import com.itextpdf.pdfocr.onnxtr.detection.OnnxDetectionPredictor;
12+
import com.itextpdf.pdfocr.onnxtr.orientation.IOrientationPredictor;
13+
import com.itextpdf.pdfocr.onnxtr.orientation.OnnxOrientationPredictor;
14+
import com.itextpdf.pdfocr.onnxtr.recognition.IRecognitionPredictor;
15+
import com.itextpdf.pdfocr.onnxtr.recognition.OnnxRecognitionPredictor;
16+
import com.itextpdf.test.ExtendedITextTest;
17+
import org.junit.jupiter.api.AfterAll;
18+
import org.junit.jupiter.api.Assertions;
19+
import org.junit.jupiter.api.BeforeAll;
20+
import org.junit.jupiter.api.Tag;
21+
import org.junit.jupiter.api.Test;
22+
23+
import java.io.File;
24+
import java.io.IOException;
25+
import java.io.InputStreamReader;
26+
import java.nio.file.Files;
27+
import java.nio.file.Paths;
28+
import java.util.Collections;
29+
30+
@Tag("IntegrationTest")
31+
public class OnnxTRCmykIntegrationTest extends ExtendedITextTest {
32+
private static final String TEST_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/";
33+
private static final String TEST_IMAGE_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/images/";
34+
private static final String TARGET_DIRECTORY = "./target/test/resources/com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/";
35+
private static final String FAST = "./src/test/resources/com/itextpdf/pdfocr/models/rep_fast_tiny-28867779.onnx";
36+
private static final String CRNNVGG16 = "./src/test/resources/com/itextpdf/pdfocr/models/crnn_vgg16_bn-662979cc.onnx";
37+
private static final String MOBILENETV3 = "./src/test/resources/com/itextpdf/pdfocr/models/mobilenet_v3_small_crop_orientation-5620cf7e.onnx";
38+
private static OnnxTrOcrEngine OCR_ENGINE;
39+
40+
@BeforeAll
41+
public static void beforeClass() {
42+
createOrClearDestinationFolder(TARGET_DIRECTORY);
43+
44+
IDetectionPredictor detectionPredictor = OnnxDetectionPredictor.fast(FAST);
45+
IRecognitionPredictor recognitionPredictor = OnnxRecognitionPredictor.crnnVgg16(CRNNVGG16);
46+
IOrientationPredictor orientationPredictor = OnnxOrientationPredictor.mobileNetV3(MOBILENETV3);
47+
48+
OCR_ENGINE = new OnnxTrOcrEngine(detectionPredictor, orientationPredictor,
49+
recognitionPredictor);
50+
}
51+
52+
@AfterAll
53+
public static void afterClass() throws Exception {
54+
OCR_ENGINE.close();
55+
}
56+
57+
@Test
58+
public void rainbowInvertedCmykTest() throws IOException {
59+
String src = TEST_IMAGE_DIRECTORY + "rainbow_inverted_cmyk.jpg";
60+
String dest = TARGET_DIRECTORY + "rainbowInvertedCmykTest.pdf";
61+
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowInvertedCmykTest.txt";
62+
63+
if (isFixedInJdk(System.getProperty("java.version"))) {
64+
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
65+
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
66+
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
67+
Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor());
68+
Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText());
69+
}
70+
} else {
71+
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
72+
Assertions.assertEquals("Failed to read image.", e.getMessage());
73+
}
74+
}
75+
76+
@Test
77+
public void rainbowAdobeCmykTest() throws IOException {
78+
String src = TEST_IMAGE_DIRECTORY + "rainbow_adobe_cmyk.jpg";
79+
String dest = TARGET_DIRECTORY + "rainbowAdobeCmykTest.pdf";
80+
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowAdobeCmykTest.txt";
81+
82+
if (isFixedInJdk(System.getProperty("java.version"))) {
83+
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
84+
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
85+
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
86+
Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor());
87+
Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText());
88+
}
89+
} else {
90+
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
91+
Assertions.assertEquals("Failed to read image.", e.getMessage());
92+
}
93+
}
94+
95+
@Test
96+
public void rainbowCmykNoProfileTest() throws IOException {
97+
String src = TEST_IMAGE_DIRECTORY + "rainbow_cmyk_inverted_no_profile.jpg";
98+
String dest = TARGET_DIRECTORY + "rainbowCmykNoProfileTest.pdf";
99+
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowCmykNoProfileTest.txt";
100+
101+
if (isFixedInJdk(System.getProperty("java.version"))) {
102+
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
103+
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
104+
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
105+
Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor());
106+
Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText());
107+
}
108+
} else {
109+
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
110+
Assertions.assertEquals("Failed to read image.", e.getMessage());
111+
}
112+
}
113+
114+
private static boolean isFixedInJdk(String versionStr) {
115+
//fixed for jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4
116+
boolean isFixed = false;
117+
int majorVer = getMajorVer(versionStr);
118+
String[] split = versionStr.split("[._-]");
119+
int minorVer = Integer.parseInt(split[split.length - 1]);
120+
121+
if (minorVer % 10 == 2) {
122+
return false;
123+
}
124+
switch (majorVer) {
125+
case 8:
126+
isFixed = minorVer >= 351;
127+
break;
128+
case 11:
129+
isFixed = minorVer >= 16;
130+
break;
131+
case 17:
132+
isFixed = minorVer >= 4;
133+
break;
134+
default:
135+
isFixed = true;
136+
}
137+
138+
return isFixed;
139+
}
140+
141+
private static int getMajorVer(String versionStr) {
142+
int majorVer = 0;
143+
String[] split = versionStr.split("\\.");
144+
if (versionStr.startsWith("1.")) {
145+
//jdk versions 1 - 8 have 1. as prefix
146+
majorVer = Integer.parseInt(split[1]);
147+
} else {
148+
majorVer = Integer.parseInt(split[0]);
149+
}
150+
return majorVer;
151+
}
152+
153+
private OcrPdfCreatorProperties creatorProperties(String layerName, Color color) {
154+
OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
155+
ocrPdfCreatorProperties.setTextLayerName(layerName);
156+
ocrPdfCreatorProperties.setTextColor(color);
157+
return ocrPdfCreatorProperties;
158+
}
159+
160+
private void doOcrAndCreatePdf(String imagePath, String destPdfPath,
161+
OcrPdfCreatorProperties ocrPdfCreatorProperties) throws IOException {
162+
OcrPdfCreator ocrPdfCreator =
163+
ocrPdfCreatorProperties != null ? new OcrPdfCreator(OCR_ENGINE, ocrPdfCreatorProperties)
164+
: new OcrPdfCreator(OCR_ENGINE);
165+
try (PdfWriter writer = new PdfWriter(destPdfPath)) {
166+
ocrPdfCreator.createPdf(Collections.singletonList(new File(imagePath)), writer).close();
167+
}
168+
}
169+
170+
private String getCmpText(String txtPath) throws IOException {
171+
int bytesCount = (int) new File(txtPath).length();
172+
char[] array = new char[bytesCount];
173+
try (InputStreamReader stream = new InputStreamReader(Files.newInputStream(Paths.get(txtPath)))) {
174+
stream.read(array, 0, bytesCount);
175+
return new String(array);
176+
}
177+
}
178+
179+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
a
2+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
3+
et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
4+
aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
5+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa
6+
qui officia deserunt mollit anim id est laborum.
7+
Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium,
8+
totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta
9+
sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia
10+
consequuntur magni dolores eos ratione voluptatem sequi nesciunt. Neque porro quisquam est,
11+
qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi
12+
tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima
13+
veniam, quis nostrum exercitatationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea
14+
commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam
15+
nihil molestiae consequatur, vel illum qui dolorem eum fugiat volupt-as nulla pariatur?
16+
At vero eos et accusamus et justo odio dignissimos ducimus qui blanditiis praesentium voluptatum
17+
deleniti atque corrupti dolores et quas molestias excepturi sint occaecati cupiditate non
18+
provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga.
19+
Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est
20+
eligendi optio cumque nihil impedit minus id maxime placeat facere possimus, omnis
21+
voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis
22+
debitis aut rerum necessitatibus saepe eveniet ut et voluptates repudiandae sint et molestiae non
23+
recusandae. Itaque earum rerum hic tenetur sapiente delectus, ut aut reiciendis voluptatibus
24+
maiores alias consequatur aut perferendis doloribus asperiores repellat.
25+
ponb onb
26+
sonb
27+
onb
28+
Inb
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
2+
et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
3+
aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
4+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident. sunt in culpa
5+
qui officia deserunt mollit anim id est laborum.
6+
Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium,
7+
totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta
8+
sunt explicabo, Nemo enim ipsam quia voluptas sit aspernatur aut odit aut fugit, sed quia
9+
consequuntur magni dolores eos ratione voluptatem sequi nesciunt. Neque porro quisquam est,
10+
qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi
11+
tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem, Ut enim ad minima
12+
veniam, quis nostrum ullam corporis suscipit laboriosam, nisi ut aliquid ex ea
13+
commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam
14+
nihil molestiae consequatur, vel illum qui dolorem eum fugiat volupt-as nulla pariatur?
15+
At vero eos et accusamus et justo odio dignissimos ducimus qui blanditiis
16+
deleniti atque corrupti dolores et quas molestias excepturi sint occaecati cupiditate non
17+
provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga.
18+
Et harum quidem rerum facilis est et expedita distinctio, Nam libero tempore, cum soluta nobis est
19+
eligendi optio cumque nihil impedit minus id maxime placeat facere possimus, omnis
20+
voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis
21+
debitis aut rerum saepe eveniet ut et voluptates repudiandae sint et molestiae non
22+
recusandae Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus
23+
maiores alias consequatur aut perferendis doloribus asperiores repellat.
24+
singessepou
25+
ponb onb
26+
sonb
27+
uungeidnjoA unpuaseeid
28+
onb
29+
weuopeexe
30+
Inb
31+
wajeidnjoA
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
2+
et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
3+
aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
4+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa
5+
qui officia deserunt mollit anim id est laborum.
6+
Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium,
7+
totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta
8+
sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, quia
9+
consequuntur magni dolores eos ratione voluptatem sequi nesciunt. Neque porro quisquam est,
10+
qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi
11+
tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima
12+
veniam, quis nostrum exercitatationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea
13+
commodi consequatur? Quis autem vel eum iure reprehenderit in ea voluptate velit esse quam
14+
nihil molestiae consequatur, vel illum qui dolorem eum fugiat volupt-as nulla pariatur?
15+
At vero eos et accusamus et justo odio dignissimos ducimus qui blanditiis praesentium voluptatum
16+
deleniti atque corrupti dolores et quas molestias excepturi sint occaecati cupiditate non
17+
provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga.
18+
Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est
19+
eligendi cumque nihil impedit minus id maxime placeat facere possimus, omnis
20+
voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis
21+
debitis aut rerum necessitatibus saepe eveniet ut et voluptates repudiandae sint et molestiae non
22+
recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus
23+
maiores alias consequatur aut perferendis doloribus asperiores repellat.
24+
ponb onb opdo
25+
sonb
26+
onb
27+
Inb
28+
Inb
29+
pas
775 KB
Loading
344 KB
Loading
2.96 MB
Loading

sharpenConfiguration.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRPdfAIntegrationTest.java" />
2222
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRRotationIntegrationTest.java" />
2323
</fileset>
24+
<!-- TODO DEVSIX-9305: fileset should be removed and OnnxTRCmykIntegrationTest made autoportable-->
25+
<fileset reason="Bad handling of JPEG CMYK in java versions without JDK-8274735 fix">
26+
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRCmykIntegrationTest.java" />
27+
</fileset>
2428
<fileset reason="Difference in Iterators in java and enumerators in .net">
2529
<file path="com/itextpdf/pdfocr/onnxtr/util/Batching.java" />
2630
<file path="com/itextpdf/pdfocr/onnxtr/util/BatchProcessingGeneratorTest.java" />
@@ -114,6 +118,10 @@
114118
<file path="com/itextpdf/pdfocr/OcrPdfTest/cmp_rotated.pdf" />
115119
<file path="com/itextpdf/pdfocr/OcrPdfTest/cmp_textWithImages.pdf" />
116120
<file path="com/itextpdf/pdfocr/OcrPdfTest/cmp_whiteText.pdf" />
121+
122+
<file path="com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/cmp_rainbowAdobeCmykTest.txt" />
123+
<file path="com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/cmp_rainbowCmykNoProfileTest.txt" />
124+
<file path="com/itextpdf/pdfocr/OnnxTRCmykIntegrationTest/cmp_rainbowInvertedCmykTest.txt" />
117125
</fileset>
118126
</resource>
119127
</configuration>

0 commit comments

Comments
 (0)