i can use the code below to draw every word and every symbol bounding box,
now i want to if i can know how many symbol in the word when i got a
word?
thanks for any info!
=================================
#include <iostream>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
#include <stdio.h>
using namespace std;
int main() {
std::cout << "Hello, World!" << std::endl;
tesseract::TessBaseAPI api ;
api.InitForAnalysePage();
api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
Pix *image = pixRead("/tmp/ytmp/en4.png");
//process gray color to white
l_uint32 pixel_color;
l_int32 r,g,b;
l_int32 width,height,depth;
width=0;
height=0;
pixGetDimensions(image,&width,&height,&depth);
printf("w=%d h=%d dep=%d\n",width,height,depth);
api.SetImage(image);
tesseract::PageIterator *iter = api.AnalyseLayout(true);
int word_count=0;
while (iter->Next(tesseract::RIL_WORD)) {
int left, top, right, bottom;
++word_count;
iter->BoundingBox(
tesseract::RIL_WORD,
&left, &top, &right, &bottom
);
*
//=============================================================== //i
got the word bounding box, but i want to know how may symbol in this
word?
//===============================================================*
pixRenderLine(image,left,top,left,bottom,3,L_CLEAR_PIXELS);
pixRenderLine(image,left,top,right,top,3,L_CLEAR_PIXELS);
pixRenderLine(image,left,bottom,right,bottom,3,L_CLEAR_PIXELS);
pixRenderLine(image,right,top,right,bottom,3,L_CLEAR_PIXELS);
}
iter->Begin();
while (iter->Next(tesseract::RIL_SYMBOL)) {
int left, top, right, bottom;
++word_count;
iter->BoundingBox(
tesseract::RIL_SYMBOL,
&left, &top, &right, &bottom
);
pixRenderLine(image,left,top,left,bottom,1,L_CLEAR_PIXELS);
pixRenderLine(image,left,top,right,top,1,L_CLEAR_PIXELS);
pixRenderLine(image,left,bottom,right,bottom,1,L_CLEAR_PIXELS);
pixRenderLine(image,right,top,right,bottom,1,L_CLEAR_PIXELS);
}
pixWrite("/tmp/ytmp/entt.png",image,IFF_PNG);
return 0;
}
--
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit
https://groups.google.com/d/msgid/tesseract-ocr/1da35024-16f1-404a-aa2a-e06e1377aacf%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.