|
9 | 9 | <types> |
10 | 10 | <typeDescription> |
11 | 11 | <name>org.texttechnologylab.annotation.ocr.abbyy.StructuralElement</name> |
12 | | - <description/> |
| 12 | + <description>Meta-Type for structural elements with pixel offsets in the ABBYY FineReader schema.</description> |
13 | 13 | <supertypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Div</supertypeName> |
14 | 14 |
|
15 | 15 | <features> |
16 | 16 | <featureDescription> |
17 | 17 | <name>top</name> |
18 | | - <description/> |
| 18 | + <description>Coordinate of the top border, in pixels.</description> |
19 | 19 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
20 | 20 | </featureDescription> |
21 | 21 |
|
22 | 22 | <featureDescription> |
23 | 23 | <name>bottom</name> |
24 | | - <description/> |
| 24 | + <description>Coordinate of the bottom border, in pixels.</description> |
25 | 25 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
26 | 26 | </featureDescription> |
27 | 27 |
|
28 | 28 | <featureDescription> |
29 | 29 | <name>left</name> |
30 | | - <description/> |
| 30 | + <description>Coordinate of the left border, in pixels.</description> |
31 | 31 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
32 | 32 | </featureDescription> |
33 | 33 |
|
34 | 34 | <featureDescription> |
35 | 35 | <name>right</name> |
36 | | - <description/> |
| 36 | + <description>Coordinate of the right border, in pixels.</description> |
37 | 37 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
38 | 38 | </featureDescription> |
39 | 39 | </features> |
40 | 40 | </typeDescription> |
41 | 41 | <typeDescription> |
42 | 42 | <name>org.texttechnologylab.annotation.ocr.abbyy.Block</name> |
43 | | - <description/> |
| 43 | + <description>A recognized block.</description> |
44 | 44 | <supertypeName>org.texttechnologylab.annotation.ocr.abbyy.StructuralElement</supertypeName> |
45 | 45 |
|
46 | 46 | <features> |
|
59 | 59 | </typeDescription> |
60 | 60 | <typeDescription> |
61 | 61 | <name>org.texttechnologylab.annotation.ocr.abbyy.BlockType</name> |
62 | | - <description/> |
| 62 | + <description>The type of a block. It can be one of the following values: Text, Table, Picture, Barcode, Separator, SeparatorsBox, Checkmark, GroupCheckmark</description> |
63 | 63 | <supertypeName>uima.cas.String</supertypeName> |
64 | 64 |
|
65 | 65 | <allowedValues> |
|
91 | 91 | </typeDescription> |
92 | 92 | <typeDescription> |
93 | 93 | <name>org.texttechnologylab.annotation.ocr.abbyy.Line</name> |
94 | | - <description/> |
| 94 | + <description>Line of a paragraph.</description> |
95 | 95 | <supertypeName>org.texttechnologylab.annotation.ocr.abbyy.StructuralElement</supertypeName> |
96 | 96 |
|
97 | 97 | <features> |
98 | 98 | <featureDescription> |
99 | 99 | <name>baseline</name> |
100 | | - <description/> |
| 100 | + <description>The distance from the baseline to the top edge of the page, in pixels.</description> |
101 | 101 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
102 | 102 | </featureDescription> |
103 | 103 |
|
104 | 104 | <featureDescription> |
105 | 105 | <name>format</name> |
106 | | - <description/> |
| 106 | + <description> |
| 107 | + If present, this line denotes a group of characters with uniform formatting. |
| 108 | + </description> |
107 | 109 | <rangeTypeName>org.texttechnologylab.annotation.ocr.abbyy.Format</rangeTypeName> |
108 | 110 | </featureDescription> |
109 | 111 | </features> |
110 | 112 | </typeDescription> |
111 | 113 | <typeDescription> |
112 | 114 | <name>org.texttechnologylab.annotation.ocr.abbyy.Paragraph</name> |
113 | | - <description/> |
| 115 | + <description>Paragraph of a recognized text.</description> |
114 | 116 | <supertypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph</supertypeName> |
115 | 117 |
|
116 | 118 | <features> |
117 | 119 | <featureDescription> |
118 | 120 | <name>leftIndent</name> |
119 | | - <description/> |
| 121 | + <description>The left paragraph indent (optional, default value is 0)</description> |
120 | 122 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
121 | 123 | </featureDescription> |
122 | 124 |
|
123 | 125 | <featureDescription> |
124 | 126 | <name>rightIndent</name> |
125 | | - <description/> |
| 127 | + <description>The right paragraph indent (optional, default value is 0)</description> |
126 | 128 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
127 | 129 | </featureDescription> |
128 | 130 |
|
129 | 131 | <featureDescription> |
130 | 132 | <name>startIndent</name> |
131 | | - <description/> |
| 133 | + <description>The indent of the first line of the paragraph optional,default value is 0)</description> |
132 | 134 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
133 | 135 | </featureDescription> |
134 | 136 |
|
135 | 137 | <featureDescription> |
136 | 138 | <name>lineSpacing</name> |
137 | | - <description/> |
| 139 | + <description>The spacing between lines (optional, default value is 0)</description> |
138 | 140 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
139 | 141 | </featureDescription> |
140 | 142 |
|
141 | 143 | <featureDescription> |
142 | 144 | <name>alignment</name> |
143 | | - <description/> |
| 145 | + <description> |
| 146 | + The paragraph alignment (optional, default value is Left). |
| 147 | + It can be one of the following values: Left, Center, Right, Justified |
| 148 | + </description> |
144 | 149 | <rangeTypeName>org.texttechnologylab.annotation.ocr.abbyy.ParagraphAlignment</rangeTypeName> |
145 | 150 | </featureDescription> |
146 | 151 | </features> |
|
165 | 170 | </typeDescription> |
166 | 171 | <typeDescription> |
167 | 172 | <name>org.texttechnologylab.annotation.ocr.abbyy.Format</name> |
168 | | - <description/> |
| 173 | + <description>Attributes of a line with uniform formatting.</description> |
169 | 174 | <supertypeName>uima.tcas.Annotation</supertypeName> |
170 | 175 |
|
171 | 176 | <features> |
172 | 177 | <featureDescription> |
173 | 178 | <name>lang</name> |
174 | | - <description/> |
| 179 | + <description>Name of the language.</description> |
175 | 180 | <rangeTypeName>uima.cas.String</rangeTypeName> |
176 | 181 | </featureDescription> |
177 | 182 |
|
178 | 183 | <featureDescription> |
179 | 184 | <name>ff</name> |
180 | | - <description/> |
| 185 | + <description>The name of the font.</description> |
181 | 186 | <rangeTypeName>uima.cas.String</rangeTypeName> |
182 | 187 | </featureDescription> |
183 | 188 |
|
184 | 189 | <featureDescription> |
185 | 190 | <name>fs</name> |
186 | | - <description/> |
| 191 | + <description>The size of the font.</description> |
187 | 192 | <rangeTypeName>uima.cas.Float</rangeTypeName> |
188 | 193 | </featureDescription> |
189 | 194 |
|
|
232 | 237 | </typeDescription> |
233 | 238 | <typeDescription> |
234 | 239 | <name>org.texttechnologylab.annotation.ocr.abbyy.Token</name> |
235 | | - <description/> |
| 240 | + <description>Token type that denotes recognized words.</description> |
236 | 241 | <supertypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token</supertypeName> |
237 | 242 |
|
238 | 243 | <features> |
239 | 244 | <featureDescription> |
240 | 245 | <name>subTokenList</name> |
241 | | - <description/> |
| 246 | + <description>Present if a linebreak hyphen was recognized, contains the individual words.</description> |
242 | 247 | <rangeTypeName>uima.cas.StringList</rangeTypeName> |
243 | 248 | <multipleReferencesAllowed>false</multipleReferencesAllowed> |
244 | 249 | </featureDescription> |
245 | 250 |
|
246 | 251 | <featureDescription> |
247 | 252 | <name>isWordFromDictionary</name> |
248 | | - <description/> |
| 253 | + <description>Specifies whether the word was found in the ABBYY FineReader dictionary.</description> |
249 | 254 | <rangeTypeName>uima.cas.Boolean</rangeTypeName> |
250 | 255 | </featureDescription> |
251 | 256 |
|
252 | 257 | <featureDescription> |
253 | 258 | <name>isWordNormal</name> |
254 | | - <description/> |
| 259 | + <description> |
| 260 | + Specifies whether the word was recognized with either a standard or user-defined language, |
| 261 | + and that it is not a number or an identifier. |
| 262 | + </description> |
255 | 263 | <rangeTypeName>uima.cas.Boolean</rangeTypeName> |
256 | 264 | </featureDescription> |
257 | 265 |
|
258 | 266 | <featureDescription> |
259 | 267 | <name>isWordNumeric</name> |
260 | | - <description/> |
| 268 | + <description>Specifies whether the word is a number</description> |
261 | 269 | <rangeTypeName>uima.cas.Boolean</rangeTypeName> |
262 | 270 | </featureDescription> |
263 | 271 |
|
264 | 272 | <featureDescription> |
265 | 273 | <name>containsHyphen</name> |
266 | | - <description/> |
| 274 | + <description>Specifies if the word contains a recognized linebreak hyphen.</description> |
267 | 275 | <rangeTypeName>uima.cas.Boolean</rangeTypeName> |
268 | 276 | </featureDescription> |
269 | 277 |
|
270 | 278 | <featureDescription> |
271 | 279 | <name>suspiciousChars</name> |
272 | | - <description/> |
| 280 | + <description>The number of characters that were recognized uncertainly.</description> |
273 | 281 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
274 | 282 | </featureDescription> |
275 | 283 |
|
276 | 284 | <featureDescription> |
277 | 285 | <name>minCharConfidence</name> |
278 | | - <description/> |
| 286 | + <description> |
| 287 | + The minimum character recognition confidence of all characters in this word. |
| 288 | + Use with caution, as these numbers are not guaranteed to be positive and, according to the |
| 289 | + ABBYY FineReader documentation, the only meaningful use of confidence is to compare different |
| 290 | + recognition variants of the same character. |
| 291 | + </description> |
279 | 292 | <rangeTypeName>uima.cas.Short</rangeTypeName> |
280 | 293 | </featureDescription> |
281 | 294 |
|
282 | 295 | <featureDescription> |
283 | 296 | <name>meanCharConfidence</name> |
284 | | - <description/> |
| 297 | + <description> |
| 298 | + The average character recognition confidence of all characters in this word. |
| 299 | + Use with caution, as these numbers are not guaranteed to be positive and, according to the |
| 300 | + ABBYY FineReader documentation, the only meaningful use of confidence is to compare different |
| 301 | + recognition variants of the same character. |
| 302 | + </description> |
285 | 303 | <rangeTypeName>uima.cas.Float</rangeTypeName> |
286 | 304 | </featureDescription> |
287 | 305 | </features> |
|
308 | 326 | </featureDescription> |
309 | 327 | <featureDescription> |
310 | 328 | <name>uri</name> |
311 | | - <description/> |
| 329 | + <description>URI of this page, i.e. a Visual Library link.</description> |
312 | 330 | <rangeTypeName>uima.cas.String</rangeTypeName> |
313 | 331 | </featureDescription> |
314 | 332 | <featureDescription> |
315 | 333 | <name>width</name> |
316 | | - <description/> |
| 334 | + <description>The image width in pixels.</description> |
317 | 335 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
318 | 336 | </featureDescription> |
319 | 337 | <featureDescription> |
320 | 338 | <name>height</name> |
321 | | - <description/> |
| 339 | + <description>The image height in pixels.</description> |
322 | 340 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
323 | 341 | </featureDescription> |
324 | 342 | <featureDescription> |
325 | 343 | <name>resolution</name> |
326 | | - <description/> |
| 344 | + <description>The image resolution in pixels per inch.</description> |
327 | 345 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
328 | 346 | </featureDescription> |
329 | 347 | <featureDescription> |
330 | 348 | <name>rotation</name> |
331 | | - <description/> |
| 349 | + <description> |
| 350 | + The type of rotation applied to original page image before processing (optional). |
| 351 | + It can be one of the following values: Normal, RotatedClockwise, RotatedUpsideDown, RotatedCounterclockwise |
| 352 | + </description> |
332 | 353 | <rangeTypeName>org.texttechnologylab.annotation.ocr.abbyy.Orientation</rangeTypeName> |
333 | 354 | </featureDescription> |
334 | 355 | </features> |
|
345 | 366 | </typeDescription> |
346 | 367 | <typeDescription> |
347 | 368 | <name>org.texttechnologylab.annotation.ocr.abbyy.Document</name> |
348 | | - <description/> |
| 369 | + <description>Document metadata.</description> |
349 | 370 | <supertypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Document</supertypeName> |
350 | 371 |
|
351 | 372 | <features> |
352 | 373 | <featureDescription> |
353 | 374 | <name>documentName</name> |
354 | | - <description/> |
| 375 | + <description>The name of this document in the XML files.</description> |
355 | 376 | <rangeTypeName>uima.cas.String</rangeTypeName> |
356 | 377 | </featureDescription> |
357 | 378 |
|
358 | 379 | <featureDescription> |
359 | 380 | <name>version</name> |
| 381 | + <description>The version of the source XML files.</description> |
360 | 382 | <rangeTypeName>uima.cas.String</rangeTypeName> |
361 | 383 | </featureDescription> |
362 | 384 |
|
363 | 385 | <featureDescription> |
364 | 386 | <name>producer</name> |
| 387 | + <description>The producer of the source XML files.</description> |
365 | 388 | <rangeTypeName>uima.cas.String</rangeTypeName> |
366 | 389 | </featureDescription> |
367 | 390 |
|
368 | 391 | <featureDescription> |
369 | 392 | <name>pagesCount</name> |
| 393 | + <description>The number of pages in this document (optional).</description> |
370 | 394 | <rangeTypeName>uima.cas.Integer</rangeTypeName> |
371 | 395 | </featureDescription> |
372 | 396 |
|
373 | 397 | <featureDescription> |
374 | 398 | <name>mainLanguage</name> |
| 399 | + <description>The main language of this document.</description> |
375 | 400 | <rangeTypeName>uima.cas.String</rangeTypeName> |
376 | 401 | </featureDescription> |
377 | 402 |
|
378 | 403 | <featureDescription> |
379 | 404 | <name>languages</name> |
| 405 | + <description>All languages used in this document (optional).</description> |
380 | 406 | <rangeTypeName>uima.cas.String</rangeTypeName> |
381 | 407 | </featureDescription> |
382 | 408 | </features> |
|
0 commit comments