diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 72503636c0..bae30ab8bb 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -64,6 +64,7 @@ #include // for round, M_PI #include // for int32_t #include // for strcmp, strcpy +#include // for std::filesystem #include // for size_t #include // for std::cin #include // for std::locale::classic @@ -82,15 +83,9 @@ #endif #if defined(_WIN32) -# include -# include -#else -# include // for closedir, opendir, readdir, DIR, dirent -# include -# include // for stat, S_IFDIR -# include -# include -#endif // _WIN32 +# include // for _O_BINARY +# include // for _setmode +#endif namespace tesseract { @@ -149,61 +144,18 @@ static void ExtractFontName(const char* filename, std::string* fontname) { /* Add all available languages recursively. */ -static void addAvailableLanguages(const std::string &datadir, const std::string &base, +static void addAvailableLanguages(const std::string &datadir, std::vector *langs) { - auto base2 = base; - if (!base2.empty()) { - base2 += "/"; - } - const size_t extlen = sizeof(kTrainedDataSuffix); -#ifdef _WIN32 - WIN32_FIND_DATA data; - HANDLE handle = FindFirstFile((datadir + base2 + "*").c_str(), &data); - if (handle != INVALID_HANDLE_VALUE) { - BOOL result = TRUE; - for (; result;) { - char *name = data.cFileName; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } - result = FindNextFile(handle, &data); - } - FindClose(handle); - } -#else // _WIN32 - DIR *dir = opendir((datadir + base).c_str()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).c_str(), &st) == 0 && (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } + for (const auto& entry : + std::filesystem::recursive_directory_iterator(datadir, + std::filesystem::directory_options::follow_directory_symlink | + std::filesystem::directory_options::skip_permission_denied)) { + auto path = entry.path().lexically_relative(datadir).string(); + auto extPos = path.rfind(".traineddata"); + if (extPos != std::string::npos) { + langs->push_back(path.substr(0, extPos)); } - closedir(dir); } -#endif } TessBaseAPI::TessBaseAPI() @@ -444,7 +396,7 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) co void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) const { langs->clear(); if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); + addAvailableLanguages(tesseract_->datadir, langs); std::sort(langs->begin(), langs->end()); } }