Skip to content

Commit b7e3ff4

Browse files
committed
Check if onnxruntime is available before its usage
DEVSIX-9248
1 parent ed40bbe commit b7e3ff4

31 files changed

Lines changed: 653 additions & 154 deletions

pdfocr-api/src/main/java/com/itextpdf/pdfocr/util/PdfOcrTextBuilder.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ private static void updateBBoxes(List<TextInfo> line) {
188188
.orElseThrow(UnsupportedOperationException::new).getBboxRect().getHeight();
189189
lineBottom = line.stream().reduce((lhs, rhs) -> Float.compare(lhs.getBboxRect().getBottom(), rhs.getBboxRect().getBottom()) > 0 ? rhs : lhs)
190190
.orElseThrow(UnsupportedOperationException::new).getBboxRect().getBottom();
191-
delta = (lineTop - lineBottom - lineHeight)/2;
191+
delta = (lineTop - lineBottom - lineHeight) / 2;
192192
for (TextInfo word : line) {
193193
word.getBboxRect().setY(lineBottom + delta).setHeight(lineHeight);
194194
}
@@ -203,9 +203,9 @@ private static void updateBBoxes(List<TextInfo> line) {
203203
.orElseThrow(UnsupportedOperationException::new).getBboxRect().getWidth();
204204
lineBottom = line.stream().reduce((lhs, rhs) -> Float.compare(lhs.getBboxRect().getLeft(), rhs.getBboxRect().getLeft()) > 0 ? rhs : lhs)
205205
.orElseThrow(UnsupportedOperationException::new).getBboxRect().getLeft();
206-
delta = (lineTop - lineBottom - lineHeight)/2;
206+
delta = (lineTop - lineBottom - lineHeight) / 2;
207207
for (TextInfo word : line) {
208-
word.getBboxRect().setX(lineBottom).setWidth(lineHeight);
208+
word.getBboxRect().setX(lineBottom + delta).setWidth(lineHeight);
209209
}
210210
break;
211211
default:

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/AbstractOnnxPredictor.java

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ This file is part of the iText (R) project.
2222
*/
2323
package com.itextpdf.pdfocr.onnxtr;
2424

25-
import com.itextpdf.pdfocr.exceptions.PdfOcrException;
26-
import com.itextpdf.pdfocr.onnxtr.util.BatchProcessingGenerator;
27-
import com.itextpdf.pdfocr.onnxtr.util.Batching;
28-
2925
import ai.onnxruntime.NodeInfo;
3026
import ai.onnxruntime.OnnxJavaType;
3127
import ai.onnxruntime.OnnxTensor;
@@ -39,6 +35,12 @@ This file is part of the iText (R) project.
3935
import ai.onnxruntime.OrtSession.SessionOptions.OptLevel;
4036
import ai.onnxruntime.TensorInfo;
4137
import ai.onnxruntime.ValueInfo;
38+
import com.itextpdf.commons.utils.MessageFormatUtil;
39+
import com.itextpdf.pdfocr.exceptions.PdfOcrException;
40+
import com.itextpdf.pdfocr.onnxtr.exceptions.PdfOcrOnnxTrExceptionMessageConstant;
41+
import com.itextpdf.pdfocr.onnxtr.util.BatchProcessingGenerator;
42+
import com.itextpdf.pdfocr.onnxtr.util.Batching;
43+
4244
import java.nio.FloatBuffer;
4345
import java.util.Arrays;
4446
import java.util.Collection;
@@ -76,6 +78,21 @@ public abstract class AbstractOnnxPredictor<T, R> implements IPredictor<T, R> {
7678
*/
7779
private final String inputName;
7880

81+
/**
82+
* Close status of the predictor.
83+
*/
84+
private boolean closed = false;
85+
86+
static {
87+
try {
88+
// OnnxRuntime.init() is used under the hood.
89+
new OrtSession.SessionOptions().close();
90+
} catch (RuntimeException | UnsatisfiedLinkError e) {
91+
DependencyLoadChecker.processException(e);
92+
throw e;
93+
}
94+
}
95+
7996
/**
8097
* Creates a new abstract predictor.
8198
*
@@ -93,20 +110,21 @@ protected AbstractOnnxPredictor(String modelPath, OnnxInputProperties inputPrope
93110
try {
94111
this.sessionOptions = createDefaultSessionOptions();
95112
} catch (OrtException e) {
96-
throw new PdfOcrException("Failed to init ONNX Runtime session options", e);
113+
throw new PdfOcrException(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_INIT_SESSION_OPTIONS, e);
97114
}
98115

99116
try {
100117
this.session = OrtEnvironment.getEnvironment().createSession(modelPath, sessionOptions);
101118
} catch (Exception e) {
102119
this.sessionOptions.close();
103-
throw new PdfOcrException("Failed to init ONNX Runtime session", e);
120+
throw new PdfOcrException(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_INIT_ONNX_RUNTIME_SESSION, e);
104121
}
105122

106123
try {
107124
this.inputName = validateModel(this.session, inputProperties, outputShape);
108125
} catch (Exception e) {
109-
final PdfOcrException userException = new PdfOcrException("ONNX Runtime model did not pass validation", e);
126+
final PdfOcrException userException = new PdfOcrException(
127+
PdfOcrOnnxTrExceptionMessageConstant.MODEL_DID_NOT_PASS_VALIDATION, e);
110128
try {
111129
this.session.close();
112130
} catch (OrtException closeException) {
@@ -123,23 +141,28 @@ public Iterator<R> predict(Iterator<T> inputs) {
123141
Batching.wrap(inputs, inputProperties.getBatchSize()),
124142
(List<T> batch) -> {
125143
try (final OnnxTensor inputTensor = createTensor(toInputBuffer(batch));
126-
final Result outputTensor = session.run(Collections.singletonMap(inputName, inputTensor))) {
144+
final Result outputTensor = session.run(Collections.singletonMap(inputName, inputTensor))) {
127145
return fromOutputBuffer(batch, parseModelOutput(outputTensor));
128146
} catch (OrtException e) {
129-
throw new PdfOcrException("ONNX Runtime operation failed", e);
147+
throw new PdfOcrException(
148+
PdfOcrOnnxTrExceptionMessageConstant.ONNX_RUNTIME_OPERATION_FAILED, e);
130149
}
131150
}
132151
);
133152
}
134153

135154
@Override
136155
public void close() {
156+
if (closed) {
157+
return;
158+
}
137159
try {
138160
session.close();
139161
sessionOptions.close();
140162
} catch (OrtException e) {
141-
throw new PdfOcrException("Failed to close an ONNX Runtime session", e);
163+
throw new PdfOcrException(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_CLOSE_ONNX_RUNTIME_SESSION, e);
142164
}
165+
closed = true;
143166
}
144167

145168
/**
@@ -205,51 +228,47 @@ private static String validateModel(OrtSession session, OnnxInputProperties prop
205228
private static String validateModelInput(OrtSession session, OnnxInputProperties properties) throws OrtException {
206229
final Collection<NodeInfo> inputInfo = session.getInputInfo().values();
207230
if (inputInfo.size() != 1) {
208-
throw new IllegalArgumentException(
209-
"Expected 1 input, but got " + inputInfo.size() + " instead"
210-
);
231+
throw new IllegalArgumentException(MessageFormatUtil.format(
232+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_INPUT_SIZE, inputInfo.size()));
211233
}
212234
final NodeInfo inputNodeInfo = inputInfo.iterator().next();
213235
final ValueInfo inputNodeValueInfo = inputNodeInfo.getInfo();
214236
if (!(inputNodeValueInfo instanceof TensorInfo)) {
215-
throw new IllegalArgumentException("Unexpected input type, expected float32 tensor");
237+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_INPUT_TYPE);
216238
}
217239
final TensorInfo inputTensorInfo = (TensorInfo) inputNodeValueInfo;
218240
if (inputTensorInfo.type != OnnxJavaType.FLOAT) {
219-
throw new IllegalArgumentException("Unexpected input type, expected float32 tensor");
241+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_INPUT_TYPE);
220242
}
221243
final long[] inputShape = inputTensorInfo.getShape();
222244
if (isShapeIncompatible(properties.getShape(), inputShape)) {
223-
throw new IllegalArgumentException(
224-
"Expected " + Arrays.toString(properties.getShape()) + " input shape, "
225-
+ "but got " + Arrays.toString(inputShape) + " instead"
226-
);
245+
throw new IllegalArgumentException(MessageFormatUtil.format(
246+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_INPUT_SHAPE, Arrays.toString(properties.getShape()),
247+
Arrays.toString(inputShape)));
227248
}
228249
return inputNodeInfo.getName();
229250
}
230251

231252
private static void validateModelOutput(OrtSession session, long[] expectedOutputShape) throws OrtException {
232253
final Collection<NodeInfo> outputInfo = session.getOutputInfo().values();
233254
if (outputInfo.size() != 1) {
234-
throw new IllegalArgumentException(
235-
"Expected 1 output, but got " + outputInfo.size() + " instead"
236-
);
255+
throw new IllegalArgumentException(MessageFormatUtil.format(
256+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_OUTPUT_SIZE, outputInfo.size()));
237257
}
238258
final NodeInfo outputNodeInfo = outputInfo.iterator().next();
239259
final ValueInfo outputNodeValueInfo = outputNodeInfo.getInfo();
240260
if (!(outputNodeValueInfo instanceof TensorInfo)) {
241-
throw new IllegalArgumentException("Unexpected output type, expected float32 tensor");
261+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_OUTPUT_TYPE);
242262
}
243263
final TensorInfo outputTensorInfo = (TensorInfo) outputNodeValueInfo;
244264
if (outputTensorInfo.type != OnnxJavaType.FLOAT) {
245-
throw new IllegalArgumentException("Unexpected output type, expected float32 tensor");
265+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_OUTPUT_TYPE);
246266
}
247267
final long[] actualOutputShape = outputTensorInfo.getShape();
248268
if (isShapeIncompatible(expectedOutputShape, actualOutputShape)) {
249-
throw new IllegalArgumentException(
250-
"Expected " + Arrays.toString(expectedOutputShape) + " output shape, "
251-
+ "but got " + Arrays.toString(actualOutputShape) + " instead"
252-
);
269+
throw new IllegalArgumentException(MessageFormatUtil.format(
270+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_OUTPUT_SHAPE, Arrays.toString(expectedOutputShape),
271+
Arrays.toString(actualOutputShape)));
253272
}
254273
}
255274

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2025 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr.onnxtr;
24+
25+
import com.itextpdf.commons.utils.StringNormalizer;
26+
import com.itextpdf.pdfocr.exceptions.PdfOcrException;
27+
import com.itextpdf.pdfocr.onnxtr.exceptions.PdfOcrOnnxTrExceptionMessageConstant;
28+
29+
/**
30+
* Internal helper class to construct a friendlier error in case some dependency couldn’t be loaded.
31+
*
32+
* <p>
33+
* NOTE: for internal usage only. Be aware that its API and functionality may be changed in the future.
34+
*/
35+
final class DependencyLoadChecker {
36+
private DependencyLoadChecker() {
37+
// Private constructor will prevent the instantiation of this class directly.
38+
}
39+
40+
/**
41+
* Processes the exception or error: checks if exception is related to some dependency that couldn’t be loaded and
42+
* in that case constructs exception with a friendlier error message, otherwise, throws exception as is.
43+
*
44+
* @param throwable exception or error to process
45+
*/
46+
public static void processException(Throwable throwable) {
47+
String throwableMessage = throwable.getMessage();
48+
boolean isOnnxRuntime = (throwable instanceof RuntimeException &&
49+
throwableMessage.contains("Failed to load onnx-runtime library")) ||
50+
(throwable instanceof UnsatisfiedLinkError && throwableMessage.contains("onnxruntime"));
51+
if (isOnnxRuntime) {
52+
String message = getOnnxRuntimeError();
53+
throw new PdfOcrException(message, throwable);
54+
}
55+
}
56+
57+
private static String getOnnxRuntimeError() {
58+
String message = PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_LOAD_ONNXRUNTIME;
59+
if (isWindows()) {
60+
message += "\nPossible causes for Windows:\n" +
61+
"lack of the latest version of the VC++ redistributable " +
62+
"(solution: install it);\n" +
63+
"for Oracle JVMs, mismatch of MSVC runtime version " +
64+
"(solution: upgrade the JVM to a version compiled with newer VC libraries).\n";
65+
}
66+
return message;
67+
}
68+
69+
/**
70+
* Checks current OS type.
71+
*
72+
* @return boolean {@code true} is current OS is Windows, otherwise - {@code false}
73+
*/
74+
private static boolean isWindows() {
75+
return StringNormalizer.toLowerCase(identifyOsType()).contains("win");
76+
}
77+
78+
/**
79+
* Identifies type of current OS and return it (win, linux).
80+
*
81+
* @return type of current os as {@link java.lang.String}
82+
*/
83+
private static String identifyOsType() {
84+
String os = System.getProperty("os.name") == null
85+
? System.getProperty("OS") : System.getProperty("os.name");
86+
return StringNormalizer.toLowerCase(os);
87+
}
88+
}

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/FloatBufferMdArray.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfocr.onnxtr;
2424

2525
import ai.onnxruntime.OrtUtil;
26+
import com.itextpdf.pdfocr.onnxtr.exceptions.PdfOcrOnnxTrExceptionMessageConstant;
27+
2628
import java.nio.FloatBuffer;
2729
import java.util.Objects;
2830

@@ -47,10 +49,10 @@ public FloatBufferMdArray(FloatBuffer data, long[] shape) {
4749
Objects.requireNonNull(data);
4850
Objects.requireNonNull(shape);
4951
if (!OrtUtil.validateShape(shape)) {
50-
throw new IllegalArgumentException("Shape is not valid");
52+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.SHAPE_IS_NOT_VALID);
5153
}
5254
if (data.remaining() != OrtUtil.elementCount(shape)) {
53-
throw new IllegalArgumentException("Data element count does not match shape");
55+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.ELEM_COUNT_DOES_NOT_MATCH_SHAPE);
5456
}
5557
this.data = data.duplicate();
5658
this.shape = shape.clone();

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/OnnxInputProperties.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ This file is part of the iText (R) project.
2222
*/
2323
package com.itextpdf.pdfocr.onnxtr;
2424

25+
import com.itextpdf.commons.utils.MessageFormatUtil;
26+
import com.itextpdf.pdfocr.onnxtr.exceptions.PdfOcrOnnxTrExceptionMessageConstant;
27+
2528
import java.util.Arrays;
2629
import java.util.Objects;
2730

@@ -70,22 +73,26 @@ public class OnnxInputProperties {
7073
public OnnxInputProperties(float[] mean, float[] std, long[] shape, boolean symmetricPad) {
7174
Objects.requireNonNull(mean);
7275
if (mean.length != EXPECTED_CHANNEL_COUNT) {
73-
throw new IllegalArgumentException("mean should be a " + EXPECTED_CHANNEL_COUNT + "-element array");
76+
throw new IllegalArgumentException(MessageFormatUtil.format(
77+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_MEAN_CHANNEL_COUNT, EXPECTED_CHANNEL_COUNT));
7478
}
7579
Objects.requireNonNull(std);
7680
if (std.length != EXPECTED_CHANNEL_COUNT) {
77-
throw new IllegalArgumentException("std should be a " + EXPECTED_CHANNEL_COUNT + "-element array");
81+
throw new IllegalArgumentException(MessageFormatUtil.format(
82+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_STD_CHANNEL_COUNT, EXPECTED_CHANNEL_COUNT));
7883
}
7984
Objects.requireNonNull(shape);
8085
if (shape.length != EXPECTED_SHAPE_SIZE) {
81-
throw new IllegalArgumentException("shape should be a " + EXPECTED_SHAPE_SIZE + "-element array (BCHW)");
86+
throw new IllegalArgumentException(MessageFormatUtil.format(
87+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_SHAPE_SIZE, EXPECTED_SHAPE_SIZE));
8288
}
8389
if (shape[1] != EXPECTED_CHANNEL_COUNT) {
84-
throw new IllegalArgumentException("Model only supports RGB images with a BCHW input format");
90+
throw new IllegalArgumentException(PdfOcrOnnxTrExceptionMessageConstant.MODEL_ONLY_SUPPORTS_RGB);
8591
}
8692
for (final long dim : shape) {
8793
if (dim <= 0 || ((int) dim) != dim) {
88-
throw new IllegalArgumentException("Unexpected dimension value: " + dim);
94+
throw new IllegalArgumentException(MessageFormatUtil.format(
95+
PdfOcrOnnxTrExceptionMessageConstant.UNEXPECTED_DIMENSION_VALUE, dim));
8996
}
9097
}
9198

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/OnnxTrEngineProperties.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class OnnxTrEngineProperties {
3636
public OnnxTrEngineProperties() {
3737

3838
}
39+
3940
/**
4041
* Defines the way text is retrieved and grouped from onnxtr engine output.
4142
* It changes the way text is selected in the result pdf document.
@@ -44,8 +45,7 @@ public OnnxTrEngineProperties() {
4445
private TextPositioning textPositioning;
4546

4647
/**
47-
* Defines the way text is retrieved from ocr engine output using
48-
* {@link TextPositioning}.
48+
* Defines the way text is retrieved from ocr engine output using {@link TextPositioning}.
4949
*
5050
* @return the way text is retrieved
5151
*/
@@ -58,6 +58,7 @@ public TextPositioning getTextPositioning() {
5858
* using {@link TextPositioning}.
5959
*
6060
* @param textPositioning the way text is retrieved
61+
*
6162
* @return the {@link OnnxTrEngineProperties} instance
6263
*/
6364
public OnnxTrEngineProperties setTextPositioning(TextPositioning textPositioning) {

0 commit comments

Comments
 (0)