Skip to content

Commit

Permalink
Rework to make it PAGE-XML-Schema valid.
Browse files Browse the repository at this point in the history
  • Loading branch information
JKamlah committed Mar 20, 2024
1 parent 4cbd60a commit 6f3c0eb
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ set(TESSERACT_SRC
src/api/capi.cpp
src/api/renderer.cpp
src/api/altorenderer.cpp
src/api/pagerenderer.cpp
src/api/hocrrenderer.cpp
src/api/lstmboxrenderer.cpp
src/api/pdfrenderer.cpp
Expand All @@ -784,6 +785,8 @@ set(TESSERACT_CONFIGS
tessdata/configs/lstmbox
tessdata/configs/lstmdebug
tessdata/configs/makebox
tessdata/configs/page
tessdata/configs/page_poly
tessdata/configs/pdf
tessdata/configs/quiet
tessdata/configs/rebox
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 la

Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.

Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO.
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO and PAGE.

You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.

Expand Down
26 changes: 13 additions & 13 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ char
return nullptr;
}

int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
int rcnt = 0, lcnt = 0, wcnt = 0;

if (input_file_.empty()) {
SetInputName(nullptr);
Expand Down Expand Up @@ -769,7 +769,7 @@ char
// TODO: Do we need to create a random number here?
std::size_t ro_id = std::hash<std::string>{}(GetInputName());
reading_order_str << "\t\t<ReadingOrder>\n"
<< "\t\t\t<OrderedGroup id=\"ro_"<< ro_id
<< "\t\t\t<OrderedGroup id=\"ro"<< ro_id
<< "\" caption=\"Regions reading order\">\n";

ResultIterator *res_it = GetIterator();
Expand All @@ -787,20 +787,20 @@ char
case PT_HEADING_IMAGE:
case PT_PULLOUT_IMAGE: {
// Handle all kinds of images.
page_str << "\t\t<Graphic id=\"r" << bcnt++ << "\">\n";
page_str << "\t\t<GraphicRegion id=\"r" << rcnt++ << "\">\n";
page_str << "\t\t\t";
AddBoxToPAGE(res_it, RIL_BLOCK, page_str);
page_str << "\t\t</Graphic>\n";
page_str << "\t\t</GraphicRegion>\n";
res_it->Next(RIL_BLOCK);
continue;
}
case PT_HORZ_LINE:
case PT_VERT_LINE:
// Handle horizontal and vertical lines.
page_str << "\t\t<Separator id=\"r_" << bcnt++ << "\">\n";
page_str << "\t\t<SeparatorRegion id=\"r" << rcnt++ << "\">\n";
page_str << "\t\t\t";
AddBoxToPAGE(res_it, RIL_BLOCK, page_str);
page_str << "\t\t</Separator>\n";
page_str << "\t\t</SeparatorRegion>\n";
res_it->Next(RIL_BLOCK);
continue;
case PT_NOISE:
Expand All @@ -814,15 +814,15 @@ char
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
// Add Block to reading order
reading_order_str << "\t\t\t\t<RegionRefIndexed "
<< "index=\"" << tcnt << "\" "
<< "regionRef=\"r_" << tcnt << "\"/>\n";
<< "index=\"" << rcnt << "\" "
<< "regionRef=\"r" << rcnt << "\"/>\n";

float deskew_angle;
res_it->Orientation(&orientation_block, &writing_direction_block, &textline_order_block,
&deskew_angle);
block_conf = ((res_it-> Confidence(RIL_BLOCK))/100.);
page_str << "\t\t<TextRegion id=\"r_" << tcnt << "\" "
<< "custom=\""<< "readingOrder {index:"<< tcnt <<";} "
page_str << "\t\t<TextRegion id=\"r" << rcnt << "\" "
<< "custom=\""<< "readingOrder {index:"<< rcnt <<";} "
<< "readingDirection {"<< WritingDirectionToStr(writing_direction_block)<<";} "
<< "orientation {"<< orientation_block <<";}\">\n";
page_str << "\t\t\t";
Expand All @@ -849,7 +849,7 @@ char

if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
line_conf = ((res_it-> Confidence(RIL_TEXTLINE))/100.);
line_str << "\t\t\t<TextLine id=\"r_" << tcnt << "_tl_" << lcnt <<"\" readingDirection=\""
line_str << "\t\t\t<TextLine id=\"r" << rcnt << "l" << lcnt <<"\" readingDirection=\""
<< WritingDirectionToStr(writing_direction) << "\" "
<< "custom=\""<< "readingOrder {index:"<< lcnt <<";}\">\n";
// If wordlevel is not set, get the line polygon and baseline
Expand All @@ -868,7 +868,7 @@ char

// Create word stream if word level output is active
if (WORDLEVELFLAG) {
word_str << "\t\t\t\t<Word id=\"r_" << tcnt << "_tl_" << lcnt << "_w_" << wcnt << "\" readingDirection=\""
word_str << "\t\t\t\t<Word id=\"r" << rcnt << "l" << lcnt << "w" << wcnt << "\" readingDirection=\""
<< WritingDirectionToStr(writing_direction) << "\" "
<< "custom=\""<< "readingOrder {index:"<< wcnt <<";}\">\n";
if (!POLYGONFLAG || ttb_flag){
Expand Down Expand Up @@ -1040,7 +1040,7 @@ char
<< "\t\t\t</TextEquiv>\n";
page_str << "\t\t</TextRegion>\n";
region_content.str("");
tcnt++;
rcnt++;
lcnt = 0;
}
}
Expand Down
4 changes: 2 additions & 2 deletions tessdata/configs/page
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
tessedit_create_page 1
tessedit_create_page_polygon 1
tessedit_create_page_wordlevel 0
tessedit_create_page_polygon 0
#tessedit_create_page_wordlevel 0
3 changes: 3 additions & 0 deletions tessdata/configs/page-poly
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
tessedit_create_page 1
tessedit_create_page_polygon 1
#tessedit_create_page_wordlevel 0

0 comments on commit 6f3c0eb

Please sign in to comment.