>
> Hello, This is my target_image.tif  
>
<https://i.stack.imgur.com/BNXbE.png>
 

> I have changed your code a little to be this
>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
//#include<opencv.hpp>
//using namespace cv;
using namespace std;


int main()
{
 tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();


 if (api->Init(".\\tessdata", "eng")) {
 fprintf(stderr, "Could not initialize tesseract.\n");
 exit(1);
 }
 api->SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);


 //Mat  mat_image = imread("target_image.tif", 0);
 Pix *image = pixRead("target_image.tif");
 //cvtColor(mat_image, mat_image, CV_GRAY2BGR);
 api->SetImage(image);
 api->SetSourceResolution(300);
 //Important
 api->Recognize(NULL);


 tesseract::ResultIterator* ri = api->GetIterator();
 tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;


 //Mat image_rect_bin(mat_image.size(), CV_8UC1, Scalar(0));
 int line = 0;
 if (ri != 0) {
 do {
 const char* symbol = ri->GetUTF8Text(level);
 if (ri->IsAtBeginningOf(tesseract::RIL_TEXTLINE))
 line++;


 if (symbol != 0) {
 int x1, y1, x2, y2;
 ri->BoundingBox(level, &x1, &y1, &x2, &y2);
 tesseract::ChoiceIterator ci(*ri);
 do {
 const char* choice = ci.GetUTF8Text();
 //rectangle(image_rect_bin, Point(x1, y1), Point(x2, y2), Scalar(255), -1);
 } while (ci.Next());
 }
 delete[] symbol;
 } while (ri->Next(level));
 }
 api->End();


 return 0;
} 

Then I think I can get all rectangle(The rectangle is important for me.). 
But if I use opencv <https://opencv.org/> to show those rectangle I have 
got. it will be this.

<https://i.stack.imgur.com/415JA.png>


As you see, I will get so many dis-separated rectangle from the 
target_image.tif. Could you help me again? Please...


I think there is a threshold to control this behavior. But I cannot find 
anything from the documentation..

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/3961bba3-ab3f-4e7b-a723-f7f452ddecc5%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to