I did not get it. When I run attached C++ test of api and I got:
$ ./test_font_features
OSD: Weak margin (1.42) for 27 blob text block, but using orientation
anyway: 0
SİSTEK  => fontname: Trebuchet_MS_Bold, size: 36, font_id: 296, bold: 1,
italic: 0, underlined: 0, monospace: 0, serif: 0, smallcap: 0
ELEKTRİK        => fontname: Trebuchet_MS_Bold, size: 25, font_id: 296,
bold: 1, italic: 0, underlined: 0, monospace: 0, serif: 0, smallcap: 1
MÜHENDİSLİK     => fontname: Trebuchet_MS_Bold, size: 25, font_id: 296,
bold: 1, italic: 0, underlined: 0, monospace: 0, serif: 0, smallcap: 1
MÜŞAVİRLİK      => fontname: Trebuchet_MS_Bold, size: 25, font_id: 296,
bold: 1, italic: 0, underlined: 0, monospace: 0, serif: 0, smallcap: 1

So "SİSTEK" is read. If do not want to display it, than just make condition
to not display text with size > 30...

Zdenko


On Sun, Dec 8, 2013 at 8:39 AM, Hsrt <[email protected]> wrote:

>
> I wanted to say their font size... "px" was not true keyword....
>
> --
> --
> You received this message because you are subscribed to the Google
> Groups "tesseract-ocr" group.
> To post to this group, send email to [email protected]
> To unsubscribe from this group, send email to
> [email protected]
> For more options, visit this group at
> http://groups.google.com/group/tesseract-ocr?hl=en
>
> ---
> You received this message because you are subscribed to the Google Groups
> "tesseract-ocr" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> For more options, visit https://groups.google.com/groups/opt_out.
>

-- 
-- 
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en

--- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.
/*
  g++ -o test_font_features test_font_features-3.cpp -ltesseract -llept
*/

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main() {
    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

    if (api->Init(NULL, "tur")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    Pix *pix = pixRead("s.png");
    api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
    api->SetImage(pix);
    
    int lcount = 1;
    api->Recognize(0);
    tesseract::ResultIterator* ri = api->GetIterator();
    if (ri != 0) {
        do {
            const char* word = ri->GetUTF8Text(tesseract::RIL_WORD);
            if (word != 0) {
                const char *font_name;
                bool bold, italic, underlined, monospace, serif, smallcaps;
                int pointsize, font_id;
                font_name = ri->WordFontAttributes(&bold, &italic, &underlined,
                                                   &monospace, &serif,
                                                   &smallcaps, &pointsize,
                                                   &font_id);
                printf("%s \t=> fontname: %s, size: %d, font_id: %d, bold: %d,"\
                       " italic: %d, underlined: %d, monospace: %d, serif: %d,"\
                       " smallcap: %d\n", word, font_name, pointsize, font_id,
                       bold, italic, underlined, monospace, serif, smallcaps);
            }
            delete[] word;
            lcount++;
        } while (ri->Next(tesseract::RIL_WORD));
    }

    delete ri;
    api->End();
    pixDestroy(&pix);
    return 0;
}

Reply via email to