Skip to content

Commit ff5fa50

Browse files
AnhelinaM$GITHUB_USERNAME
authored andcommitted
Improve text boxes taking into account arbitrary rotation
DEVSIX-9739
1 parent 209ace3 commit ff5fa50

165 files changed

Lines changed: 1514 additions & 1130 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java

Lines changed: 62 additions & 120 deletions
Large diffs are not rendered by default.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java

Lines changed: 152 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,29 @@ This file is part of the iText (R) project.
2222
*/
2323
package com.itextpdf.pdfocr;
2424

25+
import com.itextpdf.kernel.geom.Point;
2526
import com.itextpdf.kernel.geom.Rectangle;
2627
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
2728

28-
import java.util.Objects;
29-
3029
/**
3130
* This class describes how recognized text is positioned on the image
3231
* providing bbox for each text item (could be a line or a word).
3332
*/
3433
public class TextInfo {
35-
3634
/**
37-
* Contains any text.
35+
* Image pixel to PDF point ratio.
3836
*/
39-
private String text;
37+
private static final float PX_TO_PT = 0.75F;
4038

4139
/**
42-
* {@link Rectangle} describing text bbox (lower-left based) expressed in points.
40+
* Contains any text.
4341
*/
44-
private Rectangle bboxRect;
42+
private String text;
4543

4644
/**
47-
* {@link TextOrientation} describing the orientation of the text (i.e. rotation). Text is
48-
* assumed to be horizontal without any rotation by default.
45+
* Array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in PDF points.
4946
*/
50-
private TextOrientation orientation = TextOrientation.HORIZONTAL;
47+
private Point[] textPoints;
5148

5249
/**
5350
* If LogicalStructureTreeItem is set, then {@link TextInfo}s are expected to be in logical order.
@@ -67,38 +64,41 @@ public TextInfo() {
6764
*/
6865
public TextInfo(final TextInfo textInfo) {
6966
this.text = textInfo.text;
70-
this.bboxRect = new Rectangle(textInfo.bboxRect);
71-
this.orientation = textInfo.orientation;
67+
this.textPoints = (Point[]) textInfo.textPoints.clone();
7268
}
7369

7470
/**
75-
* Creates a new {@link TextInfo} instance.
71+
* Creates new {@link TextInfo} instance.
7672
*
77-
* @param text any text
78-
* @param bbox {@link Rectangle} describing text bbox
73+
* @param text text string
74+
* @param bbox array of 4 {@link Point}s describing text bbox (lower-left based relative to text)
75+
* expressed in points (0 - lower-left, 1 - upper-left, 2 - upper-right, 3 - lower-right point)
7976
*/
80-
public TextInfo(final String text, final Rectangle bbox) {
77+
public TextInfo(final String text, final Point[] bbox) {
8178
this.text = text;
82-
this.bboxRect = new Rectangle(bbox);
79+
this.textPoints = bbox;
8380
}
8481

8582
/**
86-
* Creates a new {@link TextInfo} instance.
83+
* Creates new {@link TextInfo} instance. Could be used for not rotated text chunks.
8784
*
88-
* @param text any text
89-
* @param bbox {@link Rectangle} describing text bbox
90-
* @param orientation orientation of the text
85+
* @param text text string
86+
* @param bbox {@link Rectangle} describing text bounding box expressed in PDF points
9187
*/
92-
public TextInfo(final String text, final Rectangle bbox, final TextOrientation orientation) {
88+
public TextInfo(final String text, final Rectangle bbox) {
9389
this.text = text;
94-
this.bboxRect = new Rectangle(bbox);
95-
this.orientation = Objects.requireNonNull(orientation);
90+
this.textPoints = new Point[]{
91+
new Point(bbox.getLeft(), bbox.getBottom()),
92+
new Point(bbox.getLeft(), bbox.getTop()),
93+
new Point(bbox.getRight(), bbox.getTop()),
94+
new Point(bbox.getRight(), bbox.getBottom())
95+
};
9696
}
9797

9898
/**
9999
* Gets text element.
100100
*
101-
* @return String
101+
* @return text string
102102
*/
103103
public String getText() {
104104
return text;
@@ -108,45 +108,153 @@ public String getText() {
108108
* Sets text element.
109109
*
110110
* @param newText retrieved text
111+
*
112+
* @return this instance
111113
*/
112-
public void setText(final String newText) {
114+
public TextInfo setText(final String newText) {
113115
text = newText;
116+
return this;
117+
}
118+
119+
/**
120+
* Gets array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in points.
121+
*
122+
* <p>
123+
* Point array stores text polygon in the following order relative to text:
124+
* 0 - lower-left, 1 - upper-left, 2 - upper-right, 3 - lower-right point.
125+
*
126+
* <p>
127+
* The following coordinate system is used for points coordinate:
128+
* the origin is located in left bottom corner of the page,
129+
* vertical (y) coordinates increase from the bottom of the page to the top,
130+
* horizontal (x) coordinates increase from the left side of the page to the right,
131+
* axe unit is user space unit which we call PDF point (1 PDF point = 1/72 inch = 4/3 pixel).
132+
*
133+
* @return array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in points
134+
*/
135+
public Point[] getTextPoints() {
136+
return textPoints;
137+
}
138+
139+
/**
140+
* Sets array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in points.
141+
*
142+
* <p>
143+
* Point array should store text polygon in the following order relative to text:
144+
* 0 - lower-left, 1 - upper-left, 2 - upper-right, 3 - lower-right point.
145+
*
146+
* <p>
147+
* The following coordinate system is used for points coordinate:
148+
* the origin is located in left bottom corner of the page,
149+
* vertical (y) coordinates increase from the bottom of the page to the top,
150+
* horizontal (x) coordinates increase from the left side of the page to the right,
151+
* axe unit is user space unit which we call PDF point (1 PDF point = 1/72 inch = 4/3 pixel).
152+
*
153+
* @param textPoints array of 4 {@link Point}s describing text bbox (lower-left based relative to text)
154+
* expressed in points
155+
*
156+
* @return this instance
157+
*/
158+
public TextInfo setTextPoints(Point[] textPoints) {
159+
this.textPoints = textPoints;
160+
return this;
114161
}
115162

116163
/**
117-
* Gets bbox coordinates.
164+
* Gets array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in pixels.
165+
*
166+
* <p>
167+
* Point array stores text polygon in the following order relative to text:
168+
* 0 - lower-left, 1 - upper-left, 2 - upper-right, 3 - lower-right point.
118169
*
119-
* @return {@link Rectangle} describing text bbox
170+
* <p>
171+
* The following coordinate system is used for text points coordinate:
172+
* the origin is located in left top corner of the page (image),
173+
* vertical (y) coordinates increase from the top of the page to the bottom,
174+
* horizontal (x) coordinates increase from the left side of the page to the right,
175+
* axe unit is pixel (1 pixel = 1/96 inch = 0.75 PDF point).
176+
*
177+
* @param imageHeight height of the image to convert the text PDF points to image pixels coordinates.
178+
* Used to change the {@code y} origin
179+
*
180+
* @return array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in pixels
120181
*/
121-
public Rectangle getBboxRect() {
122-
return bboxRect;
182+
public Point[] getPixelTextPoints(int imageHeight) {
183+
Point[] result = new Point[this.textPoints.length];
184+
for (int i = 0; i < result.length; ++i) {
185+
result[i] = new Point(this.textPoints[i].getX() / PX_TO_PT,
186+
imageHeight - this.textPoints[i].getY() / PX_TO_PT);
187+
}
188+
return result;
123189
}
124190

125191
/**
126-
* Sets text bbox.
192+
* Sets an array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in pixels.
193+
*
194+
* <p>
195+
* Point array should store text polygon in the following order relative to text:
196+
* 0 - lower-left, 1 - upper-left, 2 - upper-right, 3 - lower-right point.
127197
*
128-
* @param bbox {@link Rectangle} describing text bbox
198+
* <p>
199+
* The following coordinate system is used for text points coordinate:
200+
* the origin is located in left top corner of the page,
201+
* vertical (y) coordinates increase from the top of the page to the bottom,
202+
* horizontal (x) coordinates increase from the left side of the page to the right,
203+
* axe unit is pixel (1 pixel = 1/96 inch = 0.75 PDF point).
204+
*
205+
* @param textPoints array of 4 {@link Point}s describing text bbox (0 - lower-left, 1 - upper-left,
206+
* 2 - upper-right, 3 - lower-right relative to text) expressed in pixels
207+
* @param imageHeight height of the image to convert the text PDF points to image pixels coordinates.
208+
* Used to change the {@code y} origin
209+
*
210+
* @return array of 4 {@link Point}s describing text bbox (lower-left based relative to text) expressed in pixels
129211
*/
130-
public void setBboxRect(final Rectangle bbox) {
131-
this.bboxRect = new Rectangle(bbox);
212+
public TextInfo setPixelTextPoints(Point[] textPoints, int imageHeight) {
213+
Point[] result = new Point[textPoints.length];
214+
for (int i = 0; i < result.length; ++i) {
215+
result[i] = new Point(PX_TO_PT * textPoints[i].getX(),
216+
PX_TO_PT * (imageHeight - textPoints[i].getY()));
217+
}
218+
this.textPoints = result;
219+
return this;
132220
}
133221

134222
/**
135-
* Gets the text orientation.
223+
* Converts a text polygon to a bounding box.
136224
*
137-
* @return {@link TextOrientation} describing the orientation of the text (i.e. rotation)
225+
* @return {@link Rectangle} representing text bounding box
138226
*/
139-
public TextOrientation getOrientation() {
140-
return orientation;
227+
public Rectangle getBBoxRect() {
228+
float minX = (float) this.textPoints[0].getX();
229+
float maxX = minX;
230+
float minY = (float) this.textPoints[0].getY();
231+
float maxY = minY;
232+
for (int i = 1; i < this.textPoints.length; ++i) {
233+
final float x = (float) this.textPoints[i].getX();
234+
if (x < minX) {
235+
minX = x;
236+
} else if (x > maxX) {
237+
maxX = x;
238+
}
239+
final float y = (float) this.textPoints[i].getY();
240+
if (y < minY) {
241+
minY = y;
242+
} else if (y > maxY) {
243+
maxY = y;
244+
}
245+
}
246+
return new Rectangle(minX, minY, maxX - minX, maxY - minY);
141247
}
142248

143249
/**
144-
* Sets the text orientation.
250+
* Returns the text rotation angle in radian for this {@link TextInfo} in the range of -pi to pi.
145251
*
146-
* @param orientation {@link TextOrientation} describing the orientation of the text (i.e. rotation)
252+
* @return the text rotation angle in radian for the current {@link TextInfo}
147253
*/
148-
public void setOrientation(final TextOrientation orientation) {
149-
this.orientation = Objects.requireNonNull(orientation);
254+
public float getRotationAngle() {
255+
double dx = textPoints[3].getX() - textPoints[0].getX();
256+
double dy = textPoints[3].getY() - textPoints[0].getY();
257+
return (float) Math.atan2(dy, dx);
150258
}
151259

152260
/**
@@ -161,12 +269,12 @@ public LogicalStructureTreeItem getLogicalStructureTreeItem() {
161269
/**
162270
* Sets logical structure tree parent item for the text info. It allows to organize text chunks
163271
* into logical hierarchy, e.g. specify document paragraphs, tables, etc.
164-
* <p>
165272
*
273+
* <p>
166274
* If LogicalStructureTreeItem is set, then the list of {@link TextInfo}s in {@link IOcrEngine#doImageOcr}
167275
* return value is expected to be in logical order.
168276
*
169-
* @param logicalStructureTreeItem structure tree item.
277+
* @param logicalStructureTreeItem structure tree item
170278
*/
171279
public void setLogicalStructureTreeItem(LogicalStructureTreeItem logicalStructureTreeItem) {
172280
this.logicalStructureTreeItem = logicalStructureTreeItem;

pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextOrientation.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ This file is part of the iText (R) project.
2424

2525
/**
2626
* Enumeration of supported text orientations.
27+
*
28+
* <p>
29+
* {@link TextOrientation} is used to determine text bounding box rectangle orientation.
30+
* Text rotation could be more precisely determined by the bounding points, see {@link TextInfo#getRotationAngle()}.
2731
*/
2832
public enum TextOrientation {
2933
/**

0 commit comments

Comments
 (0)