Commit b5ec35b
committed
Combine HOCR and TXT outputs for more precise text recognition
PDFOC-1031 parent fe61188 commit b5ec35b
16 files changed
Lines changed: 815 additions & 181 deletions
File tree
- pdfocr-api/src/main/java/com/itextpdf/pdfocr
- pdfocr-tesseract4/src
- main/java/com/itextpdf/pdfocr/tesseract4
- test
- java/com/itextpdf/pdfocr
- events
- general
- tessdata
- tesseract4
- resources/com/itextpdf/pdfocr/images
Lines changed: 11 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
60 | 60 | | |
61 | 61 | | |
62 | 62 | | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
63 | 74 | | |
64 | 75 | | |
65 | 76 | | |
| |||
Lines changed: 35 additions & 5 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
294 | 294 | | |
295 | 295 | | |
296 | 296 | | |
297 | | - | |
| 297 | + | |
298 | 298 | | |
299 | | - | |
| 299 | + | |
| 300 | + | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
| 305 | + | |
| 306 | + | |
| 307 | + | |
| 308 | + | |
| 309 | + | |
| 310 | + | |
| 311 | + | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| 321 | + | |
| 322 | + | |
300 | 323 | | |
301 | 324 | | |
302 | 325 | | |
| |||
374 | 397 | | |
375 | 398 | | |
376 | 399 | | |
| 400 | + | |
| 401 | + | |
| 402 | + | |
| 403 | + | |
| 404 | + | |
| 405 | + | |
| 406 | + | |
| 407 | + | |
377 | 408 | | |
378 | | - | |
379 | | - | |
380 | | - | |
| 409 | + | |
| 410 | + | |
381 | 411 | | |
382 | 412 | | |
383 | 413 | | |
| |||
Lines changed: 37 additions & 6 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
33 | 33 | | |
34 | 34 | | |
35 | 35 | | |
| 36 | + | |
| 37 | + | |
36 | 38 | | |
37 | 39 | | |
38 | 40 | | |
| |||
109 | 111 | | |
110 | 112 | | |
111 | 113 | | |
| 114 | + | |
112 | 115 | | |
113 | 116 | | |
114 | 117 | | |
115 | | - | |
| 118 | + | |
116 | 119 | | |
117 | 120 | | |
118 | 121 | | |
| |||
162 | 165 | | |
163 | 166 | | |
164 | 167 | | |
165 | | - | |
166 | | - | |
167 | | - | |
168 | | - | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
169 | 173 | | |
170 | 174 | | |
171 | | - | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
172 | 179 | | |
173 | 180 | | |
174 | 181 | | |
| |||
217 | 224 | | |
218 | 225 | | |
219 | 226 | | |
| 227 | + | |
| 228 | + | |
| 229 | + | |
| 230 | + | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
| 239 | + | |
| 240 | + | |
| 241 | + | |
| 242 | + | |
| 243 | + | |
| 244 | + | |
| 245 | + | |
| 246 | + | |
| 247 | + | |
| 248 | + | |
| 249 | + | |
| 250 | + | |
220 | 251 | | |
221 | 252 | | |
222 | 253 | | |
| |||
Lines changed: 12 additions & 2 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
| 38 | + | |
| 39 | + | |
38 | 40 | | |
39 | 41 | | |
40 | 42 | | |
| |||
103 | 105 | | |
104 | 106 | | |
105 | 107 | | |
| 108 | + | |
| 109 | + | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
106 | 113 | | |
107 | 114 | | |
108 | 115 | | |
| |||
141 | 148 | | |
142 | 149 | | |
143 | 150 | | |
| 151 | + | |
144 | 152 | | |
145 | 153 | | |
146 | 154 | | |
147 | | - | |
| 155 | + | |
148 | 156 | | |
149 | 157 | | |
150 | 158 | | |
151 | 159 | | |
152 | 160 | | |
153 | 161 | | |
154 | 162 | | |
155 | | - | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
156 | 166 | | |
157 | 167 | | |
158 | 168 | | |
| |||
Lines changed: 66 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
84 | 84 | | |
85 | 85 | | |
86 | 86 | | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
87 | 95 | | |
88 | 96 | | |
89 | 97 | | |
90 | 98 | | |
91 | 99 | | |
| 100 | + | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
92 | 108 | | |
93 | 109 | | |
94 | 110 | | |
| |||
108 | 124 | | |
109 | 125 | | |
110 | 126 | | |
| 127 | + | |
111 | 128 | | |
| 129 | + | |
112 | 130 | | |
113 | 131 | | |
114 | 132 | | |
| |||
392 | 410 | | |
393 | 411 | | |
394 | 412 | | |
| 413 | + | |
| 414 | + | |
| 415 | + | |
| 416 | + | |
| 417 | + | |
| 418 | + | |
| 419 | + | |
| 420 | + | |
| 421 | + | |
| 422 | + | |
| 423 | + | |
| 424 | + | |
| 425 | + | |
| 426 | + | |
| 427 | + | |
| 428 | + | |
| 429 | + | |
| 430 | + | |
| 431 | + | |
| 432 | + | |
| 433 | + | |
| 434 | + | |
| 435 | + | |
| 436 | + | |
| 437 | + | |
| 438 | + | |
| 439 | + | |
395 | 440 | | |
396 | 441 | | |
397 | 442 | | |
| |||
411 | 456 | | |
412 | 457 | | |
413 | 458 | | |
414 | | - | |
| 459 | + | |
| 460 | + | |
| 461 | + | |
| 462 | + | |
| 463 | + | |
| 464 | + | |
| 465 | + | |
| 466 | + | |
| 467 | + | |
| 468 | + | |
| 469 | + | |
| 470 | + | |
| 471 | + | |
| 472 | + | |
| 473 | + | |
| 474 | + | |
| 475 | + | |
| 476 | + | |
| 477 | + | |
| 478 | + | |
415 | 479 | | |
416 | 480 | | |
| 481 | + | |
0 commit comments