You can try this:
$ convert -rotate 10 phototest.tif phototest-r.png
$ g++ -o test test.cpp -I/usr/local/include/tesseract/
-I/usr/local/include/leptonica/ -L/usr/local/libs -ltesseract
$ ./test
where:
*phototest.tif* is from tesseract source
*convert* - is part of imagemagick. First line is not needed ;-)
Tested on Mandrivalinux with tesseract 3.01
---------------- test.cpp ----------------
#include <baseapi.h>
#include <allheaders.h>
Pix *pix;
int offset;
float slope;
int main(int argc, char *argv[]) {
tesseract::TessBaseAPI api;
pix = pixRead("phototest-r.png");
api.Init(NULL, "eng");
api.SetPageSegMode(tesseract::PSM_AUTO);
api.SetImage(pix);
char* outText = api.GetUTF8Text();
int conf = api.MeanTextConf();
api.GetTextDirection(&offset, &slope);
printf("Confidence=%d\n", conf);
printf("Offset: %d Slope: %.2f\n", offset, slope);
printf("OCR output:\n\n");
printf(outText);
api.Clear();
api.End();
delete [] outText;
pixDestroy(&pix);
return 0;
}
---------------- test.cpp ----------------
I hope this help a little bit.
Zdenko
On Thu, Dec 15, 2011 at 11:03 PM, braza <
[email protected]> wrote:
> Hi,
>
> The world of open source welcomes me with insufficient info/examples/
> documentation but with opened doors to ask ;)
>
> I`m trying just to recognize really clear and simple line of text in
> English like "Tess TEST 123.4 $15"
>
> now I have:
>
> //Tesseract block start
> CTessOCR *tess = CTessOCR::Instance();
> tess->api->SetVariable ("tessedit_char_whitelist",
> "0123456789");
> tess->api->SetVariable ("classify_bln_numeric_mode", "1");
> tess->api->Init ("./../../tessdata", "eng");
> #ifdef DEBUG_MODE
> tess->api->SetVariable ("tessedit_adaption_debug", "T");
> tess->api->SetVariable ("tessedit_draw_outwords", "T");
> tess->api->SetVariable ("tessedit_dump_choices", "T");
> tess->api->SetVariable ("tessedit_dump_choices", "T");
> tess->api->SetVariable ("interactive_mode", "T");
> tess->api->SetVariable ("tessedit_create_hocr", "T");
> #endif
> tess->api->SetVariable ("tessedit_single_match", "0");
> tess->api->SetVariable ("tessedit_zero_rejection", "T");
> tess->api->SetVariable ("tessedit_minimal_rejection", "F");
> tess->api->SetVariable ("tessedit_write_rep_codes", "F");
> tess->api->SetVariable ("tessedit_resegment_from_boxes",
> "T");
> tess->api->SetVariable ("tessedit_train_from_boxes", "T");
> tess->api->SetVariable ("textord_fast_pitch_test", "T");
> tess->api->SetVariable ("textord_no_rejects", "T");
> tess->api->SetVariable ("edges_children_fix", "F");
> tess->api->SetVariable ("edges_childarea", "0.65");
> tess->api->SetVariable ("edges_boxarea", "0.9");
> tess->api->SetVariable ("il1_adaption_test", "1");
> tess->api->SetPageSegMode (tesseract::PSM_SINGLE_LINE);
>
> Mat img = imread( "../../tess.jpg", CV_LOAD_IMAGE_GRAYSCALE ); //err
> now
> tess->api->SetImage(convert_mat_to_pix(img));
> std::string text = tess->api->GetUTF8Text();
>
> It all fails in
>
> > match.exe!OpenBoxFile(const STRING & fname)
> match.exe!tesseract::Tesseract::ApplyBoxes(const STRING & fname,
> bool find_segmentation, BLOCK_LIST * block_list)
> match.exe!tesseract::TessBaseAPI::Recognize(ETEXT_DESC * monitor)
> match.exe!tesseract::TessBaseAPI::GetUTF8Text()
>
> Obviously it fails because I`ve never set input file name with boxes.
> But why would I need it? I already have trained data downloaded and
> put in tessdata: eng.traineddata, eng.cube.size etc
>
> --
> You received this message because you are subscribed to the Google
> Groups "tesseract-ocr" group.
> To post to this group, send email to [email protected]
> To unsubscribe from this group, send email to
> [email protected]
> For more options, visit this group at
> http://groups.google.com/group/tesseract-ocr?hl=en
>
--
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en