I have this code. it takes 6 minutes per page to run! Is there any way to
speed up it?
// Works but is slow
char byt;
char *text, *versionStrP;
l_int32 w, h, d, wpl, format;
PIX *pixd = NULL;
size_t size = iarch_->size();
l_uint8 *data;
if ( (data = (l_uint8 *) malloc( size )) != NULL )
{
memcpy(data, iarch_->raw(), size);
}
else
{
throw behavior_ocr_tesseract_exception("Memory allocation error!");
}
// pixReadMem;
pixd = pixReadMem(data, size);
if (pixd == NULL)
{
throw behavior_ocr_tesseract_exception("Could not create PIX from
memory!");
}
// tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
TessBaseAPI api;
trace("Tess OCR Version: " + std::string(api.Version()));
versionStrP = getLeptonicaVersion();
trace("Leptonica Version: " + std::string(versionStrP));
lept_free(versionStrP);
versionStrP = getImagelibVersions();
trace("Imagelib Version: " + std::string(versionStrP));
lept_free(versionStrP);
if (api.Init(NULL, iarch_->meta_data("engine-language").c_str()))
// por.traineddata need stay on:
// /usr/local/share/tessdata
// if (api.Init("./behavior/tesseract",
iarch_->meta_data("engine-language").c_str()))
{
throw behavior_ocr_tesseract_exception("Could not initialize
tesseract.");
}
api.SetPageSegMode(PSM_AUTO);
api.SetImage(pixd);
char* out_text = api.GetUTF8Text();
//trace(out_tex);
oarch_->load((const char*) out_text, strlen( out_text ));
api.Clear();
api.End();
delete [] out_text;
pixDestroy(&pixd);
--
--
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en
---
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.