Skip to content

Commit 22632e3

Browse files
committed
Added code cleaning up the images extracted for OCR.
1 parent 2ffd3d2 commit 22632e3

6 files changed

Lines changed: 129 additions & 1 deletion

File tree

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#include "stdafx.h"
2+
#include "OcrImageNoiseCleaner.h"
3+
4+
using namespace cv;
5+
using namespace std;
6+
7+
OcrImageNoiseCleaner::OcrImageNoiseCleaner(wstring imageFileName, Mat originalImageData, SystemMethods* systemMethods, bool runDebugging)
8+
: BasicReaderData(imageFileName, originalImageData, systemMethods, runDebugging)
9+
{
10+
}
11+
12+
OcrImageNoiseCleaner::~OcrImageNoiseCleaner()
13+
{
14+
}
15+
16+
void OcrImageNoiseCleaner::CleanImage(Mat& dirtyImage) {
17+
18+
Mat originalImage;
19+
dirtyImage.copyTo(originalImage);
20+
21+
threshold(dirtyImage, dirtyImage, 130, 255, THRESH_BINARY);
22+
Contours contours = ImageHelper::GetCannyContours(dirtyImage, 120);
23+
LetterAreas figures = ImageHelper::ToLetterAreas(contours);
24+
25+
for (size_t i = 0; i < figures.size(); i++) {
26+
27+
handleFigure(figures[i], dirtyImage);
28+
}
29+
}
30+
31+
void OcrImageNoiseCleaner::handleFigure(LetterArea figure, Mat& dirtyImage) {
32+
33+
if (figure.Box.size.area() == 0) { return; }
34+
35+
Mat contourImage = ImageHelper::DrawLimits(dirtyImage, { figure.TightContour }, Hierarchy(), false);
36+
37+
if (isNoise(figure, dirtyImage.size())) {
38+
39+
Scalar white(255, 255, 255);
40+
ImageHelper::FillContour(dirtyImage, figure.OuterContour, white);
41+
}
42+
}
43+
44+
bool OcrImageNoiseCleaner::isNoise(LetterArea figure, Size imageArea) {
45+
46+
if (isOblong(figure)) { return true; }
47+
if (isSmallEnoughToBeNoise(figure) &&
48+
!isInCenter(figure, imageArea)) { return true; }
49+
50+
return false;
51+
}
52+
53+
bool OcrImageNoiseCleaner::isOblong(LetterArea figure) {
54+
55+
double height = min(figure.Box.size.height, figure.Box.size.width);
56+
double width = max(figure.Box.size.height, figure.Box.size.width);
57+
double relation = width / height;
58+
59+
return (relation > 5.5);
60+
}
61+
62+
bool OcrImageNoiseCleaner::isSmallEnoughToBeNoise(LetterArea figure) {
63+
64+
bool isOpen = !isContourConvex(figure.TightContour);
65+
double area = contourArea(isOpen ? figure.OuterContour : figure.TightContour);
66+
int maxNoiseArea = (int)(WORKING_CARD_HEIGHT / 8.5); //80
67+
68+
return (area <= maxNoiseArea);
69+
}
70+
71+
bool OcrImageNoiseCleaner::isInCenter(LetterArea figure, Size imageArea) {
72+
73+
int borderMargin = (int)(WORKING_CARD_HEIGHT / 27.2); //25
74+
Rect rCenter(borderMargin, borderMargin, imageArea.width - 2 * borderMargin, imageArea.height - 2 * borderMargin);
75+
RotatedRect rrCenter = ImageHelper::ToRotatedRectangle(rCenter);
76+
bool placedInCenter = ImageHelper::DoesRectangleContainPoint(rrCenter, figure.Box.center);
77+
78+
return placedInCenter;
79+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#pragma once
2+
#include "BasicReaderData.h"
3+
class OcrImageNoiseCleaner :
4+
public BasicReaderData
5+
{
6+
public:
7+
OcrImageNoiseCleaner(std::wstring imageFileName, cv::Mat originalImageData, SystemMethods* systemMethods, bool runDebugging);
8+
~OcrImageNoiseCleaner();
9+
10+
////Cleans the text images from noise and clutter.
11+
void CleanImage(cv::Mat& dirtyImage);
12+
13+
private:
14+
15+
//Handles a single figure (noise or letter).
16+
void handleFigure(LetterArea figure, cv::Mat& dirtyImage);
17+
//Checks if a figure is noise.
18+
bool isNoise(LetterArea figure, cv::Size imageArea);
19+
//Checks if a figure is too long and thin to be a letter.
20+
bool isOblong(LetterArea figure);
21+
//Checks if a figure is so small it can be considered noise.
22+
bool isSmallEnoughToBeNoise(LetterArea figure);
23+
//Checks if a figure is in the middle of the text image.
24+
bool isInCenter(LetterArea figure, cv::Size imageArea);
25+
};
26+

ReadMagicCard/ReadMagicCard.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ xcopy /Y "$(ProjectDir)deployment\vcredist_x86.exe" "$(OutDir)"</Command>
189189
<ClInclude Include="LoadOcvImage.h" />
190190
<ClInclude Include="MtgCardInfoHelper.h" />
191191
<ClInclude Include="OcrDecodeResult.h" />
192+
<ClInclude Include="OcrImageNoiseCleaner.h" />
192193
<ClInclude Include="ReadingConfiguration.h" />
193194
<ClInclude Include="SaveOcvImage.h" />
194195
<ClInclude Include="SectionExtractor.h" />
@@ -224,6 +225,7 @@ xcopy /Y "$(ProjectDir)deployment\vcredist_x86.exe" "$(OutDir)"</Command>
224225
<ClCompile Include="LoadOcvImage.cpp" />
225226
<ClCompile Include="MtgCardInfoHelper.cpp" />
226227
<ClCompile Include="OcrDecodeResult.cpp" />
228+
<ClCompile Include="OcrImageNoiseCleaner.cpp" />
227229
<ClCompile Include="ReadingConfiguration.cpp" />
228230
<ClCompile Include="SaveOcvImage.cpp" />
229231
<ClCompile Include="SectionExtractor.cpp" />

ReadMagicCard/ReadMagicCard.vcxproj.filters

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@
168168
<ClInclude Include="StoreCardProcessingData.h">
169169
<Filter>Application</Filter>
170170
</ClInclude>
171+
<ClInclude Include="OcrImageNoiseCleaner.h">
172+
<Filter>Engine\Helpers</Filter>
173+
</ClInclude>
171174
</ItemGroup>
172175
<ItemGroup>
173176
<ClCompile Include="CardNameInfo.cpp">
@@ -266,6 +269,9 @@
266269
<ClCompile Include="StoreCardProcessingData.cpp">
267270
<Filter>Application</Filter>
268271
</ClCompile>
272+
<ClCompile Include="OcrImageNoiseCleaner.cpp">
273+
<Filter>Engine\Helpers</Filter>
274+
</ClCompile>
269275
</ItemGroup>
270276
<ItemGroup>
271277
<None Include="tessdata\eng.traineddata">

ReadMagicCard/TitleExtractor.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "AlgorithmHelper.h"
66
#include "SaveOcvImage.h"
77
#include "LetterFilter.h"
8+
#include "OcrImageNoiseCleaner.h"
89

910
using namespace cv;
1011
using namespace std;
@@ -38,6 +39,9 @@ bool TitleExtractor::ExtractTitle(vector<Mat>& outImages, int binaryThreshold, i
3839
//Extract a clean image containing the title.
3940
bool success = getTitleText(outImage, outImages, numberOfTries);
4041

42+
//Clean the text images from noise and clutter.
43+
cleanOcrImages(outImages);
44+
4145
//Store that the extration has been run, indicating that we can
4246
//extract extra data (such as if the title has black background colour).
4347
hasRunTitleExtraction = true;
@@ -133,7 +137,6 @@ bool TitleExtractor::getTitleText(const Mat titleImage, vector<Mat>& textImages,
133137
//Cut out the title text.
134138
int borderThickness = 10;
135139
ImageHelper::CropImageWithSolidBorder(straightenTitleImage, straightenTitleImage, straightTextArea, borderThickness);
136-
137140
textImages.push_back(straightenTitleImage);
138141

139142
if (letters.size() < 7) {
@@ -212,3 +215,13 @@ RotatedRect TitleExtractor::getTextArea(Contour letters, TrendLine centerLine, T
212215

213216
return textArea;
214217
}
218+
219+
void TitleExtractor::cleanOcrImages(vector<Mat>& outImages) {
220+
221+
OcrImageNoiseCleaner cleaner(imageFileName, originalImageData, systemMethods, runDebugging);
222+
223+
for (Mat dirtyImage : outImages) {
224+
225+
cleaner.CleanImage(dirtyImage);
226+
}
227+
}

ReadMagicCard/TitleExtractor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class TitleExtractor :
2626
bool getTitleText(const cv::Mat titleImage, std::vector<cv::Mat>& textImages, int& numberOfTries);
2727
//Gets the rectangle bounding the text area.
2828
cv::RotatedRect getTextArea(Contour letters, TrendLine centerLine, TrendLine baseLine, cv::Mat titleImage, int numberOfTries);
29+
//Cleans the text images from noise and clutter.
30+
void cleanOcrImages(std::vector<cv::Mat>& outImages);
2931

3032
//Tells if the original title image has a black background.
3133
bool hasOriginalTitleBlackBackground;

0 commit comments

Comments
 (0)