void TextWindow::recognize(const char *imagepath)
{
    Pix* pixs = pixRead(imagepath);
    if (!pixs)
    {
        fprintf(stderr, "Cannot open input file: %s\n", imagepath);
        exit(2);
    }
    tesseract::TessBaseAPI api;
    const char* lang = "pol";
    const char* datapath = "/usr/share/tesseract-ocr";
    tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
    tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
    int init_failed = api.Init(datapath, lang, enginemode);
    if (init_failed)
    {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }
    api.SetImage(pixs);
    tesseract::Orientation orientation;
    tesseract::WritingDirection direction;
    tesseract::TextlineOrder order;
    float deskew_angle;

    tesseract::PageIterator* it = api.AnalyseLayout();
    if (it) {
        it->Orientation(&orientation, &direction, &order, &deskew_angle);
        printf(
                    "Orientation: %d\nWritingDirection: %d\nTextlineOrder: 
%d\n"
                    "Deskew angle: %.4f\n",
                    orientation, direction, order, deskew_angle);
    } else {
        //ret_val = 1;
    }

    delete it;

    pixDestroy(&pixs);
    ////
    tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
    PreloadRenderers(&api, &renderers, pagesegmode, "wynik");
    if (!renderers.empty()) {
        bool succeed = api.ProcessPages(imagepath, NULL, 0, renderers[0]);
        if (!succeed) {
            fprintf(stderr, "Error during processing.\n");
            exit(1);
        }
    }
}
Above procedure recognize text from image and save it to file "wynik.txt". 
But my images are letter addressing: first is first and last name WITHOUT 
digits, next is address and post code eq 12-345 - ONLY digits. How can I 
use this information for more accurate recognision of text?

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/d09945bf-81c9-47f0-aca7-0629af6151e0%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to