In case you did not binarize input image and you got strange output: ALLWAY
check a thresholded image (GetThresholdedImage()) - see adapted you code
and  thresholdedImage.png

Zdenko


On Mon, Oct 7, 2013 at 2:06 PM, Veerendra Jonnalagadda <[email protected]
> wrote:

> Hi
> I have written simple code in C to extract text from Image.
> But it is extracting only one line but I need all four lines to be
> extrated....
> Please Help me....
> Regards
> Veerendra
>
> #include<stdio.h>
> #include<tesseract/baseapi.h>
> #include<tesseract/strngs.h>
> #include<leptonica/allheaders.h>
> int main(int count, char** string)
> {
> int confidence;
> char* text;
> const char* lan="eng";
>  const char* img_path;
> if (count < 2)
> {
>   printf("Empty path");
>  return -1;
>   }
> img_path=string[1];
>   tesseract::TessBaseAPI* ocr = new tesseract::TessBaseAPI();
>  printf("Tesseract-ocr version is  %s\n", ocr->Version());
> printf("Leptonica version is %s\n", getLeptonicaVersion());
>   if(ocr->Init(NULL, lan, tesseract::OEM_DEFAULT))
> {
> printf("tesseract-ocr initialize error\n");
>   return -1;
>   }
>   FILE* in = fopen(img_path, "rb");
>   if (in == NULL) {
> printf("can not open path ...\n");
>   return -1;
>   }
>   fclose(in);
> system("convert Playing.bmp Playing.tiff");
>   Pix *img = pixRead("Playing.tiff");
>   ocr->SetImage(img);
> Boxa* box=ocr->GetComponentImages(tesseract::RIL_TEXTLINE,true,NULL,NULL);
>  printf("Number of Text Line is %d\n",box->n);
>   for (int z = 0; z < box->n; z++)
> {
>   BOX* b = boxaGetBox(box, z, L_CLONE);
>   ocr->SetRectangle(b->x, b->y, b->w, b->h);
>   text = ocr->GetUTF8Text();
>   confidence = ocr->MeanTextConf();
>   printf("TEXT is %s\n",text);
>   }
>   pixDestroy(&img);
>   ocr->End();
>   return 0;
> }
>
> --
> --
> You received this message because you are subscribed to the Google
> Groups "tesseract-ocr" group.
> To post to this group, send email to [email protected]
> To unsubscribe from this group, send email to
> [email protected]
> For more options, visit this group at
> http://groups.google.com/group/tesseract-ocr?hl=en
>
> ---
> You received this message because you are subscribed to the Google Groups
> "tesseract-ocr" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> For more options, visit https://groups.google.com/groups/opt_out.
>

-- 
-- 
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en

--- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.
/*
 * g++ test-2013-10-07.cpp -o test-2013-10-07 -llept -ltesseract
 * ./test-2013-10-07 .
 */

#include<stdio.h>
#include<tesseract/baseapi.h>
#include<tesseract/strngs.h>
#include<leptonica/allheaders.h>

int main(int count, char** string)
{
    int confidence;
    char* text;
    const char* lan="eng";
    const char* img_path;
    if (count < 2)
    {
        printf("Empty path\n");
        return -1;
    }
    img_path=string[1];
    tesseract::TessBaseAPI* ocr = new tesseract::TessBaseAPI();
    printf("Tesseract-ocr version is  %s\n", ocr->Version());
    printf("Leptonica version is %s\n", getLeptonicaVersion());
    if(ocr->Init(NULL, lan, tesseract::OEM_DEFAULT))
    {
        printf("tesseract-ocr initialize error\n");
        return -1;
    }
    FILE* in = fopen(img_path, "rb");
    if (in == NULL) {
        printf("can not open path...\n");
        return -1;
    }
    fclose(in);
    //system("convert Playing.bmp Playing.tiff");
    Pix *img = pixRead("Playing.tiff");
    ocr->SetImage(img);

    Pix *pix2 = ocr->GetThresholdedImage();
    pixWrite("thresholdedImage.png", pix2, IFF_PNG);
    pixDestroy(&pix2);

    Boxa* box=ocr->GetComponentImages(tesseract::RIL_TEXTLINE,true,NULL,NULL);
    printf("Number of Text Line is %d\n",box->n);
    for (int z = 0; z < box->n; z++)
    {
        BOX* b = boxaGetBox(box, z, L_CLONE);
        ocr->SetRectangle(b->x, b->y, b->w, b->h);
        text = ocr->GetUTF8Text();
        confidence = ocr->MeanTextConf();
        printf("TEXT is %s\n",text);
    }
    pixDestroy(&img);
    ocr->End();
    return 0;
}

<<attachment: thresholdedImage.png>>

Reply via email to