Skip to content

Commit

Permalink
Rewrite how OCR is performed and results are returned. I found out ab…
Browse files Browse the repository at this point in the history
…out the method Tesseract#getWords which returns both text and bounding boxes at the same time. My previous method was erroneously calling methods to calculate those independently of each other, so it was performing OCR twice; this should theoretically halve the execution time.

I also simplified the convoluted mess of transforming results between arrays and lists. Grid2D is now OCRArray2D and is the universal container for OCR results. It contains OCRArrayNodes which hold the array position, value, and bounding box of each OCR word.
  • Loading branch information
Phanabani committed Jan 15, 2021
1 parent 8fcda50 commit de23548
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 288 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import net.sourceforge.tess4j.ITessAPI;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.Word;
import org.apache.commons.text.similarity.LevenshteinDistance;

import java.awt.*;
Expand All @@ -12,21 +12,11 @@
import java.util.ArrayList;
import java.util.Arrays;

class OCRResult {
ArrayList<ArrayList<Integer>> values;
ArrayList<Rectangle> regions;

public OCRResult(ArrayList<ArrayList<Integer>> values, ArrayList<Rectangle> regions) {
this.values = values;
this.regions = regions;
}
}

class DetectionResult {
OCRResult matrix, sequences;
OCRArray2D matrix, sequences;
int bufferSize;

public DetectionResult(OCRResult matrix, OCRResult sequences, int bufferSize) {
public DetectionResult(OCRArray2D matrix, OCRArray2D sequences, int bufferSize) {
this.matrix = matrix;
this.sequences = sequences;
this.bufferSize = bufferSize;
Expand Down Expand Up @@ -121,12 +111,6 @@ private void initTesseract() {
tess.setTessVariable("user_defined_dpi", "300");
}

private static void offsetRegions(ArrayList<Rectangle> regions, Point origin) {
for (Rectangle r : regions) {
r.translate(origin.x, origin.y);
}
}

/**
* Find a black outlined rectangle in the image. The algorithm first searches
* left until it finds a black pixel (leftmost bounds of the box), searches
Expand Down Expand Up @@ -189,38 +173,28 @@ private int calcBufferSize(Rectangle bufferBoundingBox) {
return (int)(innerWidth * 18/23 / innerHeight);
}

private OCRResult doOCR(BufferedImage img, Rectangle boundingBox) {
private OCRArray2D doOCR(BufferedImage img, Rectangle boundingBox) {
try {
img = img.getSubimage(
boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height
);
} catch (RasterFormatException e) {
return null;
}
try {
String text = tess.doOCR(img);
ArrayList<Rectangle> regions = (ArrayList<Rectangle>) tess.getSegmentedRegions(
img, ITessAPI.TessPageIteratorLevel.RIL_WORD
);
offsetRegions(regions, boundingBox.getLocation());
return new OCRResult(parseText(text), regions);
} catch (NullPointerException | TesseractException e) {
return null;
}
}

private static ArrayList<ArrayList<Integer>> parseText(String text) {
ArrayList<ArrayList<Integer>> rows = new ArrayList<>();
for (String rowText : text.split("\n")) {
// Split up lines
ArrayList<Integer> row = new ArrayList<>();
for (String word : rowText.split(" ")) {
// Parse each word in the line as a hex value
row.add(parseWord(word));
}
rows.add(row);
OCRArray2D array = new OCRArray2D();
Rectangle lastBounds = null;
for (Word word : tess.getWords(img, ITessAPI.TessPageIteratorLevel.RIL_WORD)) {
Rectangle bounds = word.getBoundingBox();
bounds.translate(boundingBox.x, boundingBox.y);
if (lastBounds == null || bounds.y > lastBounds.y + lastBounds.height)
// The bounding box is below the last one, so we're on a new row
array.addRow();
array.add(parseWord(word.getText()), bounds);
lastBounds = bounds;
}
return rows;

return array;
}

private static Integer parseWord(String word) {
Expand Down Expand Up @@ -288,15 +262,17 @@ public DetectionResult detect() {

// Detect the matrix
BufferedImage captureMatrix;
OCRResult matrix = null;
OCRArray2D matrix = null;
for (int thresh=MATRIX_THRESHOLD; thresh<=MATRIX_THRESHOLD_MAX;
thresh+=MATRIX_THRESHOLD_DELTA) {
// TODO it'd probably be nice to optimize the image processing by
// cropping rather than full copies ;)
captureMatrix = ImageProcessing.copy(captureMaster);
ImageProcessing.threshold(captureMatrix, thresh);
ImageProcessing.invert(captureMatrix);

matrix = doOCR(captureMatrix, matrixBox);
if (matrix != null && Utils.isGridUniform(matrix.values))
if (matrix != null && matrix.isGrid())
// the OCR successfully found a well-formed grid
break;
}
Expand All @@ -308,7 +284,7 @@ public DetectionResult detect() {
ImageProcessing.threshold(captureSequences, SEQUENCES_THRESHOLD);
ImageProcessing.invert(captureSequences);

OCRResult sequences = doOCR(captureSequences, sequencesBox);
OCRArray2D sequences = doOCR(captureSequences, sequencesBox);
if (sequences == null)
return null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
import java.awt.AWTException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.LogManager;

public class Main implements NativeKeyListener {
Expand Down Expand Up @@ -72,8 +72,6 @@ public void nativeKeyReleased(NativeKeyEvent e) {
}

public void runSuite() {
StringBuilder sb;

overlay.clearSolution();
try {
Thread.sleep(100);
Expand All @@ -88,54 +86,27 @@ public void runSuite() {
return;
}

if (logger.isDebugEnabled()) {
sb = new StringBuilder();
for (ArrayList<Integer> row : detection.matrix.values) {
for (Integer cell : row) {
sb.append(String.format("%02X ", cell));
}
sb.append("\n");
}
logger.debug("Matrix:\n{}", sb);

sb = new StringBuilder();
for (ArrayList<Integer> row : detection.sequences.values) {
for (Integer i : row) {
sb.append(String.format("%02X ", i));
}
sb.append("\n");
}
logger.debug("Sequences:\n{}", sb);

logger.debug("Buffer size: {}", detection.bufferSize);
}

Integer[][] matrixArr = Utils.tryGet2DSubarray(detection.matrix.values);
if (matrixArr == null) {
overlay.clearSolution();
return;
}
logger.debug("Matrix:\n{}", detection.matrix);
logger.debug("Sequences:\n{}", detection.sequences);
logger.debug("Buffer size: {}", detection.bufferSize);

solver.setAll(matrixArr, detection.sequences.values, detection.bufferSize);
solver.setAll(detection);
solver.solve();
ArrayList<GridNode> solution = solver.getSolution();
List<OCRArrayNode> solution = solver.getSolution();
if (solution == null) {
overlay.clearSolution();
return;
}

if (logger.isDebugEnabled()) {
sb = new StringBuilder();
for (GridNode s : solver.getSolution()) {
sb.append(String.format("%02X (%d, %d)\n", s.value, s.x, s.y));
StringBuilder sb = new StringBuilder();
for (OCRArrayNode node : solution) {
sb.append(String.format("\n(%d, %d) %02X", node.x, node.y, node.value));
}
sb.append("\n");
logger.debug("Solution:\n{}", sb);
logger.debug("Solution:{}", sb);
}

int matrixWidth = matrixArr[0].length;
overlay.setRegions(detection.matrix.regions);
overlay.setSolution(solution, matrixWidth);
overlay.setSolution(solution);

System.gc();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package com.github.hawkpath.cyberpunk_breach_protocol_solver;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

class OCRArrayNode {
public int x;
public int y;
public Integer value;
public Rectangle boundingBox;

public OCRArrayNode(int x, int y, Integer value, Rectangle boundingBox) {
this.x = x;
this.y = y;
this.value = value;
this.boundingBox = boundingBox;
}

public boolean equals(int other) {
return value.equals(other);
}

public boolean equals(OCRArrayNode other) {
return value.equals(other.value);
}

public String toString() {
return String.format("<OCRArrayNode %02X (%d, %d)>", value, x, y);
}
}

public class OCRArray2D implements Iterable<List<OCRArrayNode>> {

private List<List<OCRArrayNode>> rows;
private List<OCRArrayNode> lastRow;
private Boolean isGrid = true;

public OCRArray2D() {
rows = new ArrayList<>();
}

public Iterator<List<OCRArrayNode>> iterator() {
return rows.iterator();
}

public String toString() {
StringBuilder sb = new StringBuilder();
for (int y=0; y<rows.size(); y++) {
if (y != 0)
sb.append("\n");

for (OCRArrayNode cell : getRow(y)) {
sb.append(String.format("%02X ", cell.value));
}
}
return sb.toString();
}

public void addRow() {
lastRow = new ArrayList<>();
rows.add(lastRow);
}

public void add(Integer value, Rectangle boundingBox) {
makeFieldsDirty();
lastRow.add(new OCRArrayNode(lastRow.size(), rows.size() - 1, value, boundingBox));
}

private void makeFieldsDirty() {
isGrid = null;
}

public OCRArrayNode get(int x, int y) throws IndexOutOfBoundsException {
return rows.get(y).get(x);
}

public List<OCRArrayNode> getRow(int y) throws IndexOutOfBoundsException {
return rows.get(y);
}

public boolean isGrid() {
if (isGrid != null)
return isGrid;

if (rows.size() <= 1) {
isGrid = true;
return isGrid;
}

int width = rows.get(0).size();
for (int i=1; i<rows.size(); i++) {
if (rows.get(i).size() != width) {
isGrid = false;
return isGrid;
}
}

isGrid = true;
return isGrid;
}

public int getWidth() {
return isGrid() ? lastRow.size() : -1;
}

public int getHeight() {
return rows.size();
}

public OCRArrayNode findInRow(int row, OCRArrayNode nodeWithValue, int start) {
return findInRow(row, nodeWithValue.value, start);
}

public OCRArrayNode findInRow(int row, int value, int start) {
if (!isGrid())
return null;

for (int i=start; i<getWidth(); i++) {
OCRArrayNode node = get(i, row);
if (node.equals(value))
return node;
}

return null;
}

public OCRArrayNode findInColumn(int col, OCRArrayNode nodeWithValue, int start) {
return findInColumn(col, nodeWithValue.value, start);
}

public OCRArrayNode findInColumn(int col, int value, int start) {
if (!isGrid())
return null;

for (int i=start; i<getHeight(); i++) {
OCRArrayNode node = get(col, i);
if (node.equals(value))
return node;
}

return null;
}

}
Loading

0 comments on commit de23548

Please sign in to comment.