Skip to content

Commit 9dd0dd1

Browse files
committed
Update paths in javadoc
Add read.md for conversion script DEVSIX-9776
1 parent 4603c70 commit 9dd0dd1

6 files changed

Lines changed: 410 additions & 352 deletions

File tree

easyOcr_to_onnx_export/README.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# easyOcr_to_onnx_export script
2+
3+
### Disclaimer
4+
5+
There is no official method for converting EasyOCR models to ONNX, so a custom approach was required.
6+
The converted recognition models retain the same inputs and outputs as the original versions, while the detection models were slightly adjusted to better fit our use case.
7+
<br>
8+
9+
10+
## Setup Instructions
11+
12+
Follow these steps to set up a virtual environment and install the required dependencies.
13+
14+
### 1. Create a virtual environment
15+
16+
```bash
17+
python -m venv .venv
18+
```
19+
20+
---
21+
22+
### 2. Activate the virtual environment
23+
24+
* **Linux / macOS:**
25+
26+
```bash
27+
source .venv/bin/activate
28+
```
29+
30+
* **Windows:**
31+
32+
```bash
33+
.venv\Scripts\activate
34+
```
35+
36+
---
37+
38+
### 3. Install dependencies
39+
40+
Install all required packages using the `requirements.txt` file:
41+
42+
```bash
43+
pip install -r requirements.txt
44+
```
45+
46+
---
47+
48+
### 4. Run the script
49+
50+
```bash
51+
python easyOcr_to_onnx_export.py <model_dir>
52+
```
53+
54+
Replace `<model_dir>` with the path to your EasyOCR model directory.
55+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
easyocr==1.7.2
2+
torch==2.8.0
3+
onnxscript

pdfocr-onnx-abstract/src/main/java/com/itextpdf/pdfocr/onnx/detection/OnnxDetectionPredictor.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -347,19 +347,19 @@ public static OnnxDetectionPredictor linkNet(String modelPath, IOrtSessionOption
347347
* This method can be used to load the following PaddleOCR models:
348348
* <ul>
349349
* <li>
350-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
350+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
351351
* PP-OCRv5_server_det
352352
* </a>
353353
* <li>
354-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
354+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
355355
* PP-OCRv5_mobile_det
356356
* </a>
357357
* <li>
358-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
358+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
359359
* PP-OCRv4_server_det
360360
* </a>
361361
* <li>
362-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
362+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
363363
* PP-OCRv4_mobile_det
364364
* </a>
365365
* </ul>
@@ -403,19 +403,19 @@ public static OnnxDetectionPredictor paddleOcr(String modelDirPath) throws IOExc
403403
* This method can be used to load the following PaddleOCR models:
404404
* <ul>
405405
* <li>
406-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
406+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
407407
* PP-OCRv5_server_det
408408
* </a>
409409
* <li>
410-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
410+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
411411
* PP-OCRv5_mobile_det
412412
* </a>
413413
* <li>
414-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
414+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
415415
* PP-OCRv4_server_det
416416
* </a>
417417
* <li>
418-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
418+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
419419
* PP-OCRv4_mobile_det
420420
* </a>
421421
* </ul>
@@ -454,19 +454,19 @@ public static OnnxDetectionPredictor paddleOcr(String modelDirPath,
454454
* This method can be used to load the following PaddleOCR models:
455455
* <ul>
456456
* <li>
457-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
457+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
458458
* PP-OCRv5_server_det
459459
* </a>
460460
* <li>
461-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
461+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
462462
* PP-OCRv5_mobile_det
463463
* </a>
464464
* <li>
465-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
465+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
466466
* PP-OCRv4_server_det
467467
* </a>
468468
* <li>
469-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
469+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
470470
* PP-OCRv4_mobile_det
471471
* </a>
472472
* </ul>
@@ -503,19 +503,19 @@ public static OnnxDetectionPredictor paddleOcr(String modelPath, String configPa
503503
* This method can be used to load the following PaddleOCR models:
504504
* <ul>
505505
* <li>
506-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
506+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
507507
* PP-OCRv5_server_det
508508
* </a>
509509
* <li>
510-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
510+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
511511
* PP-OCRv5_mobile_det
512512
* </a>
513513
* <li>
514-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
514+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
515515
* PP-OCRv4_server_det
516516
* </a>
517517
* <li>
518-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
518+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
519519
* PP-OCRv4_mobile_det
520520
* </a>
521521
* </ul>
@@ -552,7 +552,7 @@ public static OnnxDetectionPredictor paddleOcr(String modelPath, String configPa
552552
* This can be used to load the following models from EasyOCR:
553553
* <ul>
554554
* <li>
555-
* <a href="https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip">
555+
* <a href="https://huggingface.co/itextresearch/itext-EasyOCR-craft_mlt_25k">
556556
* CRAFT
557557
* </a>
558558
* </ul>
@@ -582,7 +582,7 @@ public static OnnxDetectionPredictor easyOcr(String modelPath) {
582582
* This can be used to load the following models from EasyOCR:
583583
* <ul>
584584
* <li>
585-
* <a href="https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip">
585+
* <a href="https://huggingface.co/itextresearch/itext-EasyOCR-craft_mlt_25k">
586586
* CRAFT
587587
* </a>
588588
* </ul>

pdfocr-onnx-abstract/src/main/java/com/itextpdf/pdfocr/onnx/detection/OnnxDetectionPredictorProperties.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -426,19 +426,19 @@ public static OnnxDetectionPredictorProperties linkNet(String modelPath,
426426
* This method can be used to load the following PaddleOCR models:
427427
* <ul>
428428
* <li>
429-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
429+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
430430
* PP-OCRv5_server_det
431431
* </a>
432432
* <li>
433-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
433+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
434434
* PP-OCRv5_mobile_det
435435
* </a>
436436
* <li>
437-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
437+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
438438
* PP-OCRv4_server_det
439439
* </a>
440440
* <li>
441-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
441+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
442442
* PP-OCRv4_mobile_det
443443
* </a>
444444
* </ul>
@@ -480,19 +480,19 @@ public static OnnxDetectionPredictorProperties paddleOcr(String modelDirPath) th
480480
* This method can be used to load the following PaddleOCR models:
481481
* <ul>
482482
* <li>
483-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
483+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
484484
* PP-OCRv5_server_det
485485
* </a>
486486
* <li>
487-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
487+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
488488
* PP-OCRv5_mobile_det
489489
* </a>
490490
* <li>
491-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
491+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
492492
* PP-OCRv4_server_det
493493
* </a>
494494
* <li>
495-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
495+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
496496
* PP-OCRv4_mobile_det
497497
* </a>
498498
* </ul>
@@ -529,19 +529,19 @@ public static OnnxDetectionPredictorProperties paddleOcr(String modelDirPath,
529529
* This method can be used to load the following PaddleOCR models:
530530
* <ul>
531531
* <li>
532-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
532+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
533533
* PP-OCRv5_server_det
534534
* </a>
535535
* <li>
536-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
536+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
537537
* PP-OCRv5_mobile_det
538538
* </a>
539539
* <li>
540-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
540+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
541541
* PP-OCRv4_server_det
542542
* </a>
543543
* <li>
544-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
544+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
545545
* PP-OCRv4_mobile_det
546546
* </a>
547547
* </ul>
@@ -576,19 +576,19 @@ public static OnnxDetectionPredictorProperties paddleOcr(String modelPath, Strin
576576
* This method can be used to load the following PaddleOCR models:
577577
* <ul>
578578
* <li>
579-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar">
579+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_server_det_infer">
580580
* PP-OCRv5_server_det
581581
* </a>
582582
* <li>
583-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar">
583+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv5_mobile_det_infer">
584584
* PP-OCRv5_mobile_det
585585
* </a>
586586
* <li>
587-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_server_det_infer.tar">
587+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_server_det_infer">
588588
* PP-OCRv4_server_det
589589
* </a>
590590
* <li>
591-
* <a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv4_mobile_det_infer.tar">
591+
* <a href="https://huggingface.co/itextresearch/itext-PP-OCRv4_mobile_det_infer">
592592
* PP-OCRv4_mobile_det
593593
* </a>
594594
* </ul>
@@ -629,7 +629,7 @@ public static OnnxDetectionPredictorProperties paddleOcr(String modelPath, Strin
629629
* This can be used to load the following models from EasyOCR:
630630
* <ul>
631631
* <li>
632-
* <a href="https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip">
632+
* <a href="https://huggingface.co/itextresearch/itext-EasyOCR-craft_mlt_25k">
633633
* CRAFT
634634
* </a>
635635
* </ul>
@@ -659,7 +659,7 @@ public static OnnxDetectionPredictorProperties easyOcr(String modelPath) {
659659
* This can be used to load the following models from EasyOCR:
660660
* <ul>
661661
* <li>
662-
* <a href="https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip">
662+
* <a href="https://huggingface.co/itextresearch/itext-EasyOCR-craft_mlt_25k">
663663
* CRAFT
664664
* </a>
665665
* </ul>

0 commit comments

Comments
 (0)