2020-02-21 00:45:53 +07:00
|
|
|
#include "recognizerworker.h"
|
|
|
|
|
#include "debug.h"
|
|
|
|
|
#include "task.h"
|
|
|
|
|
#include "tesseract.h"
|
|
|
|
|
|
|
|
|
|
RecognizeWorker::~RecognizeWorker() = default;
|
|
|
|
|
|
|
|
|
|
void RecognizeWorker::handle(const TaskPtr &task)
|
|
|
|
|
{
|
|
|
|
|
SOFT_ASSERT(task, return );
|
|
|
|
|
SOFT_ASSERT(task->isValid(), return );
|
|
|
|
|
SOFT_ASSERT(!tessdataPath_.isEmpty(), return );
|
|
|
|
|
|
2020-04-24 02:06:08 +07:00
|
|
|
LTRACE() << "Start recognize" << task->captured;
|
2020-02-21 00:45:53 +07:00
|
|
|
auto result = task;
|
|
|
|
|
|
|
|
|
|
if (!engines_.count(task->sourceLanguage)) {
|
2020-04-24 02:06:08 +07:00
|
|
|
LTRACE() << "Create OCR engine" << task->sourceLanguage;
|
|
|
|
|
|
2020-07-18 16:26:59 +07:00
|
|
|
auto engine = std::make_unique<Tesseract>(task->sourceLanguage,
|
|
|
|
|
tessdataPath_, tesseractLibrary_);
|
2020-02-21 00:45:53 +07:00
|
|
|
|
|
|
|
|
if (!engine->isValid()) {
|
|
|
|
|
result->error = tr("Failed to init OCR engine: %1").arg(engine->error());
|
|
|
|
|
emit finished(result);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
engines_.emplace(task->sourceLanguage, std::move(engine));
|
2020-04-24 02:06:08 +07:00
|
|
|
LTRACE() << "Added OCR engine" << task->sourceLanguage;
|
2020-02-21 00:45:53 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto &engine = engines_[task->sourceLanguage];
|
|
|
|
|
SOFT_ASSERT(engine->isValid(), return );
|
|
|
|
|
|
|
|
|
|
result->recognized = engine->recognize(task->captured);
|
|
|
|
|
if (result->recognized.isEmpty())
|
|
|
|
|
result->error = engine->error();
|
|
|
|
|
|
2020-04-08 00:35:32 +07:00
|
|
|
lastGenerations_[task->sourceLanguage] = task->generation;
|
|
|
|
|
removeUnused(task->generation);
|
|
|
|
|
|
2020-02-21 00:45:53 +07:00
|
|
|
emit finished(result);
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-18 16:26:59 +07:00
|
|
|
void RecognizeWorker::reset(const QString &tessdataPath,
|
|
|
|
|
const QString &tesseractLibrary)
|
2020-02-21 00:45:53 +07:00
|
|
|
{
|
2020-07-18 16:26:59 +07:00
|
|
|
if (tessdataPath_ == tessdataPath && tesseractLibrary_ == tesseractLibrary)
|
2020-02-21 00:45:53 +07:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
tessdataPath_ = tessdataPath;
|
2020-07-18 16:26:59 +07:00
|
|
|
tesseractLibrary_ = tesseractLibrary;
|
2020-02-21 00:45:53 +07:00
|
|
|
engines_.clear();
|
2020-04-24 02:06:08 +07:00
|
|
|
LTRACE() << "Cleared OCR engines";
|
2020-02-21 00:45:53 +07:00
|
|
|
}
|
2020-04-08 00:35:32 +07:00
|
|
|
|
|
|
|
|
void RecognizeWorker::removeUnused(Generation current)
|
|
|
|
|
{
|
|
|
|
|
const auto keepGenerations = 10;
|
|
|
|
|
for (auto it = lastGenerations_.begin(), end = lastGenerations_.end();
|
|
|
|
|
it != end;) {
|
|
|
|
|
if (current - it->second < keepGenerations) {
|
|
|
|
|
++it;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
engines_.erase(it->first);
|
2020-04-24 02:06:08 +07:00
|
|
|
LTRACE() << "Removed unused OCR engine" << it->first;
|
2020-04-08 00:35:32 +07:00
|
|
|
it = lastGenerations_.erase(it);
|
|
|
|
|
}
|
|
|
|
|
}
|