Skip to content

Commit

Permalink
Software Decoding
Browse files Browse the repository at this point in the history
- Software decoder support
- Command line argument for saving frames as bitmaps
  • Loading branch information
yowidin committed Dec 26, 2022
1 parent 8f8117a commit 6fe5e7e
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 55 deletions.
15 changes: 1 addition & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,6 @@ project(ocr_suite VERSION 0.0.1 LANGUAGES CXX)
include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
conan_basic_setup(NO_OUTPUT_DIRS)

# find_package(ffmpeg REQUIRED CONFIG)
# find_package(tesseract REQUIRED CONFIG)
# find_package(sqlite3 REQUIRED CONFIG)
# find_package(lyra CONFIG REQUIRED)
# find_package(spdlog CONFIG REQUIRED)
# find_package(indicators CONFIG REQUIRED)
# find_package(Boost CONFIG REQUIRED COMPONENTS filesystem)


add_executable(ocr_suite
src/main.cpp
src/video.cpp
Expand All @@ -24,11 +15,7 @@ add_executable(ocr_suite
src/speed_meter.cpp
src/options.cpp)

target_link_libraries(ocr_suite PRIVATE ${CONAN_LIBS}
# ffmpeg::avcodec ffmpeg::avformat ffmpeg::avutil ffmpeg::swscale
# tesseract::libtesseract
# SQLite::SQLite3 bfg::lyra spdlog::spdlog Boost::filesystem indicators::indicators
)
target_link_libraries(ocr_suite PRIVATE ${CONAN_LIBS})

target_include_directories(ocr_suite
PUBLIC include
Expand Down
7 changes: 6 additions & 1 deletion include/ocs/ocr.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#ifndef OCR_SUITE_OCR_H
#define OCR_SUITE_OCR_H

#include <ocs/options.h>
#include <ocs/video.h>

#include <functional>
#include <string>
#include <vector>
Expand Down Expand Up @@ -35,7 +37,7 @@ class ocr {
using ocr_filter_cb_t = std::function<bool(std::int64_t)>;

public:
ocr(const std::string &tess_data_path, const std::string &languages, ocr_result_cb_t cb);
ocr(const ocs::options &opts, ocr_result_cb_t cb);
ocr(const ocr &) = delete;

ocr &operator=(const ocr &) = delete;
Expand All @@ -47,6 +49,9 @@ class ocr {
void do_ocr(const frame_t &frame);

private:
const ocs::options *opts_;
std::string bitmap_directory_{};

tesseract::TessBaseAPI ocr_api_{};
ocr_result_cb_t cb_;
int min_letters_threshold_{3};
Expand Down
3 changes: 3 additions & 0 deletions include/ocs/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ struct options {

//! Video frame filter
std::uint16_t frame_filter{};

//! Save bitmaps to disk
bool save_bitmaps{false};
};

} // namespace ocs
Expand Down
31 changes: 31 additions & 0 deletions include/ocs/util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//
// Created by Dennis Sitelew on 26.12.22.
//

#ifndef OCR_SUITE_UTIL_H
#define OCR_SUITE_UTIL_H

#include <algorithm>
#include <string>

namespace ocs {

// trim from start (in place)
inline void ltrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { return !std::isspace(ch); }));
}

// trim from end (in place)
inline void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
}

// trim from both ends (in place)
inline void trim(std::string &s) {
rtrim(s);
ltrim(s);
}

} // namespace ocs

#endif // OCR_SUITE_UTIL_H
6 changes: 2 additions & 4 deletions include/ocs/video.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define OCR_SUITE_VIDEO_H

#include <ocs/value_queue.h>
#include <ocs/options.h>

#include <cstdint>
#include <memory>
Expand Down Expand Up @@ -60,10 +61,7 @@ class video {
using queue_ptr_t = std::shared_ptr<queue_t>;

public:
video(const std::string &filename,
queue_ptr_t queue,
std::int64_t starting_frame = 0,
frame_filter filter = frame_filter::I_and_P);
video(const options &opts, queue_ptr_t queue, std::int64_t starting_frame = 0);
~video();

public:
Expand Down
8 changes: 4 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#include <thread>

#include <spdlog/spdlog.h>
#include <boost/asio.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/signal_set.hpp>
#include <indicators/progress_spinner.hpp>

int main(int argc, const char **argv) {
Expand All @@ -26,8 +27,7 @@ int main(int argc, const char **argv) {
/// --- Setup the progress reporters ---

const auto starting_frame_number = db.get_starting_frame_number();
const auto frame_filter = static_cast<ocs::video::frame_filter>(options.frame_filter);
ocs::video video_file{options.video_file, queue, starting_frame_number, frame_filter};
ocs::video video_file{options, queue, starting_frame_number};

std::string postfix = "Processing ...";

Expand Down Expand Up @@ -116,7 +116,7 @@ int main(int argc, const char **argv) {
return !processed;
};

ocs::ocr ocr{options.tess_data_path, options.language, ocr_callback};
ocs::ocr ocr{options, ocr_callback};
ocr.start(queue, filter_callback);
} catch (const std::exception &ex) {
SPDLOG_ERROR("Consumer thread exception: {}", ex.what());
Expand Down
48 changes: 28 additions & 20 deletions src/ocr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,39 @@
// Created by Dennis Sitelew on 21.12.22.
//

#include <ocs/bmp.h>
#include <ocs/ocr.h>
#include <spdlog/spdlog.h>
#include <ocs/util.h>

#include <algorithm>
#include <functional>

#include <spdlog/spdlog.h>
#include <tesseract/baseapi.h>
#include <boost/filesystem.hpp>

using namespace ocs;

namespace {

// trim from start (in place)
inline void ltrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { return !std::isspace(ch); }));
std::string get_bitmap_directory(const std::string &db_path) {
boost::filesystem::path path{db_path};
path.remove_filename();
path /= "out";
return path.string();
}

// trim from end (in place)
inline void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
}

// trim from both ends (in place)
inline void trim(std::string &s) {
rtrim(s);
ltrim(s);
std::string get_frame_path(const std::string &bitmap_dir, std::int64_t frame_number) {
boost::filesystem::path path{bitmap_dir};
path /= fmt::format("out-{}.bmp", frame_number);
return path.string();
}

} // namespace

ocr::ocr(const std::string &tess_data_path, const std::string &languages, ocr_result_cb_t cb)
: cb_{std::move(cb)} {
int res = ocr_api_.Init(tess_data_path.c_str(), languages.c_str(), tesseract::OEM_LSTM_ONLY);
ocr::ocr(const ocs::options &opts, ocr_result_cb_t cb)
: opts_{&opts}
, cb_{std::move(cb)} {
int res = ocr_api_.Init(opts_->tess_data_path.c_str(), opts_->language.c_str(), tesseract::OEM_LSTM_ONLY);
if (res) {
throw std::runtime_error("Could not initialize tesseract");
}
Expand All @@ -48,6 +48,11 @@ ocr::ocr(const std::string &tess_data_path, const std::string &languages, ocr_re
#endif

ocr_api_.SetVariable("debug_file", null_device);

bitmap_directory_ = get_bitmap_directory(opts_->database_file);
if (opts_->save_bitmaps) {
boost::filesystem::create_directories(bitmap_directory_);
}
}

void ocr::start(const value_queue_ptr_t &queue, const ocr_filter_cb_t &filter) {
Expand All @@ -58,13 +63,16 @@ void ocr::start(const value_queue_ptr_t &queue, const ocr_filter_cb_t &filter) {
}

auto frame = opt_frame.value();

if (opts_->save_bitmaps) {
const auto file_name = get_frame_path(bitmap_directory_, frame->frame_number);
ocs::bmp::save_image(frame->data, frame->width, frame->height, file_name);
}

if (filter(frame->frame_number)) {
do_ocr(frame);
}

// auto file_name = std::string("out/out-") + std::to_string(frame->frame_number) + ".bmp";
// ocs::bmp::save_image(frame->data, frame->width, frame->height, file_name);

queue->add_producer_value(frame);
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ std::optional<options> options::parse(int argc, const char **argv) {
"frame types. 3 is default (I+P frames)") |
lyra::opt(result.video_file, "video_file")["-i"]["--video-file"]("Video file to process").required() |
lyra::opt(result.database_file, "database_file")["-o"]["--database-file"]("Resulting OCR database").required() |
lyra::opt([&](bool) { result.save_bitmaps = true; })["-b"]["--save-bitmaps"](
"Save video bitmaps in the out/ subdirectory") |
lyra::help(show_help);

auto parse_result = cli.parse({argc, argv});
Expand Down
Loading

0 comments on commit 6fe5e7e

Please sign in to comment.