ScreenTranslator/src/ocr/tesseract.cpp

213 lines
4.8 KiB
C++
Raw Normal View History

2020-02-21 00:45:53 +07:00
#include "tesseract.h"
#include "debug.h"
#include "languagecodes.h"
#include "task.h"
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <QBuffer>
#include <QDir>
2020-03-09 18:47:01 +07:00
2020-02-21 00:45:53 +07:00
#if defined(Q_OS_LINUX)
#include <fstream>
static qint64 getFreeMemory()
{
std::string token;
std::ifstream file("/proc/meminfo");
qint64 freeMem = 0;
while (file >> token) {
if (token == "MemFree:" || token == "Buffers:" || token == "Cached:") {
unsigned long mem = 0;
freeMem += (file >> mem) ? mem : 0;
}
}
return freeMem * 1024;
}
#elif defined(Q_OS_WIN)
#include <windows.h>
#undef min
#undef max
static qint64 getFreeMemory()
{
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
if (GlobalMemoryStatusEx(&statex)) {
return statex.ullAvailPhys;
}
return -1;
}
#endif
static Pix *convertImage(const QImage &image)
{
QBuffer buffer;
buffer.open(QIODevice::WriteOnly);
image.save(&buffer, "BMP");
const auto &data = buffer.data();
return pixReadMemBmp(reinterpret_cast<const l_uint8 *>(data.constData()),
data.size());
2020-02-21 00:45:53 +07:00
}
static QImage convertImage(Pix &image)
{
l_uint8 *buffer = nullptr;
size_t len = 0;
pixWriteMemBmp(&buffer, &len, &image);
2020-02-21 00:45:53 +07:00
QImage result;
result.loadFromData(static_cast<uchar *>(buffer), len);
2020-02-21 00:45:53 +07:00
return result;
}
2020-03-09 17:36:32 +07:00
static double getScale(Pix *source)
{
SOFT_ASSERT(source, return -1.0);
const auto xRes = pixGetXRes(source);
const auto yRes = pixGetYRes(source);
if (xRes * yRes == 0)
return -1.0;
2020-03-09 18:47:01 +07:00
const auto preferredScale = std::max(500.0 / std::min(xRes, yRes), 1.0);
2020-03-09 17:36:32 +07:00
if (preferredScale <= 1.0)
return -1.0;
2020-03-09 18:47:01 +07:00
const auto maxScaleX = std::numeric_limits<int>::max() / double(source->w);
2020-03-09 17:36:32 +07:00
const auto scaleX = std::min(preferredScale, maxScaleX);
2020-03-09 18:47:01 +07:00
const auto maxScaleY = std::numeric_limits<int>::max() / double(source->h);
2020-03-09 17:36:32 +07:00
const auto scaleY = std::min(preferredScale, maxScaleY);
auto scale = std::min(scaleX, scaleY);
const auto availableMemory = getFreeMemory() * 0.95;
if (availableMemory < 1)
return -1.0;
const auto actualSize = source->w * source->h * source->d / 8;
const auto maxScaleMemory = availableMemory / actualSize;
scale = std::min(scale, maxScaleMemory);
return scale;
}
2020-02-21 00:45:53 +07:00
static Pix *prepareImage(const QImage &image)
{
2020-03-09 17:36:32 +07:00
auto pix = convertImage(image);
SOFT_ASSERT(pix, return nullptr);
2020-02-21 00:45:53 +07:00
2020-03-09 17:36:32 +07:00
auto gray = pixConvertRGBToGray(pix, 0.0, 0.0, 0.0);
SOFT_ASSERT(gray, return nullptr);
2020-02-21 00:45:53 +07:00
pixDestroy(&pix);
2020-03-09 17:36:32 +07:00
auto scaleSource = gray;
auto scaled = scaleSource;
if (const auto scale = getScale(scaleSource); scale > 1.0) {
scaled = pixScale(scaleSource, scale, scale);
if (!scaled)
scaled = scaleSource;
2020-02-21 00:45:53 +07:00
}
2020-03-09 17:36:32 +07:00
if (scaled != scaleSource)
pixDestroy(&scaleSource);
2020-02-21 00:45:53 +07:00
return scaled;
}
static void cleanupImage(Pix **image)
{
pixDestroy(image);
}
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath)
{
SOFT_ASSERT(!tessdataPath.isEmpty(), return );
SOFT_ASSERT(!language.isEmpty(), return );
init(language, tessdataPath);
}
Tesseract::~Tesseract() = default;
void Tesseract::init(const LanguageId &language, const QString &tessdataPath)
{
SOFT_ASSERT(!engine_, return );
LanguageCodes languages;
auto langCodes = languages.findById(language);
if (!langCodes) {
error_ = QObject::tr("unknown recognition language: %1").arg(language);
return;
}
engine_ = std::make_unique<tesseract::TessBaseAPI>();
auto result =
engine_->Init(qPrintable(tessdataPath), qPrintable(langCodes->tesseract),
tesseract::OEM_DEFAULT);
if (result == 0)
return;
error_ = QObject::tr("troubles with tessdata");
engine_.reset();
}
const QString &Tesseract::error() const
{
return error_;
}
QStringList Tesseract::availableLanguageNames(const QString &path)
{
if (path.isEmpty())
return {};
QDir dir(path);
if (!dir.exists())
return {};
LanguageIds names;
LanguageCodes languages;
const auto files = dir.entryList({"*.traineddata"}, QDir::Files);
for (const auto &file : files) {
const auto lang = file.left(file.indexOf("."));
if (const auto bundle = languages.findByTesseract(lang))
names.append(QObject::tr(bundle->name));
else
names.append(lang);
}
if (names.isEmpty())
return {};
return names;
}
2020-02-21 00:45:53 +07:00
QString Tesseract::recognize(const QPixmap &source)
{
SOFT_ASSERT(engine_, return {});
SOFT_ASSERT(!source.isNull(), return {});
error_.clear();
Pix *image = prepareImage(source.toImage());
SOFT_ASSERT(image != NULL, return {});
engine_->SetImage(image);
char *outText = engine_->GetUTF8Text();
engine_->Clear();
cleanupImage(&image);
QString result = QString(outText).trimmed();
delete[] outText;
if (result.isEmpty())
error_ = QObject::tr("Failed to recognize text");
return result;
}
bool Tesseract::isValid() const
{
return engine_.get();
}