>
> Hello, This is my target_image.tif
>
<https://i.stack.imgur.com/BNXbE.png>
> I have changed your code a little to be this
>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
//#include<opencv.hpp>
//using namespace cv;
using namespace std;
int main()
{
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
if (api->Init(".\\tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
api->SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
//Mat mat_image = imread("target_image.tif", 0);
Pix *image = pixRead("target_image.tif");
//cvtColor(mat_image, mat_image, CV_GRAY2BGR);
api->SetImage(image);
api->SetSourceResolution(300);
//Important
api->Recognize(NULL);
tesseract::ResultIterator* ri = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
//Mat image_rect_bin(mat_image.size(), CV_8UC1, Scalar(0));
int line = 0;
if (ri != 0) {
do {
const char* symbol = ri->GetUTF8Text(level);
if (ri->IsAtBeginningOf(tesseract::RIL_TEXTLINE))
line++;
if (symbol != 0) {
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
tesseract::ChoiceIterator ci(*ri);
do {
const char* choice = ci.GetUTF8Text();
//rectangle(image_rect_bin, Point(x1, y1), Point(x2, y2), Scalar(255), -1);
} while (ci.Next());
}
delete[] symbol;
} while (ri->Next(level));
}
api->End();
return 0;
}
Then I think I can get all rectangle(The rectangle is important for me.).
But if I use opencv <https://opencv.org/> to show those rectangle I have
got. it will be this.
<https://i.stack.imgur.com/415JA.png>
As you see, I will get so many dis-separated rectangle from the
target_image.tif. Could you help me again? Please...
I think there is a threshold to control this behavior. But I cannot find
anything from the documentation..
--
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit
https://groups.google.com/d/msgid/tesseract-ocr/3961bba3-ab3f-4e7b-a723-f7f452ddecc5%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.