I have found the solution, if anyone else is interested. Here is a sample 
application:

int main()
{
PIX* image = pixRead("R:/P12_0.jpg");

string outText = "";
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
api->Init(NULL, "deu", tesseract::OcrEngineMode::OEM_LSTM_ONLY);
api->SetVariable("tessedit_char_whitelist", "0123456789 .-LBME");
api->SetImage(image);
api->SetSourceResolution(300);

api->SetPageSegMode(tesseract::PSM_AUTO);
api->SetVariable("lstm_choice_mode", "2");

string text = api->GetUTF8Text();
cout << "text: " << text << endl;

tesseract::ResultIterator* res_it = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;

int i = 0;
if (res_it != 0) {
do { 
string word = res_it->GetUTF8Text(tesseract::RIL_WORD);
tesseract::ChoiceIterator ci(*res_it);
do {
if (ci.Confidence() > 60) {
const char* ch = ci.GetUTF8Text();

cout << ch;
}


} while (ci.Next());

i++;
if (i == word.length()) {
cout << " ";
i = 0;
}
} while (res_it->Next(level));

}
}

[email protected] schrieb am Donnerstag, 3. September 2020 um 08:18:09 
UTC+2:

> forgot to mention that I am using tesseract C++ API:
>
>         tesseract::ResultIterator* res_it = api->GetIterator();
> tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;                
>         
>         tesseract::ChoiceIterator ci(*res_it);
>         do {
>   if (ci.Confidence() >= 0) {
>     Choice* c = new Choice();
>     const char* ch = ci.GetUTF8Text();  
>   }
>           } while (ci.Next());
> [email protected] schrieb am Donnerstag, 3. September 2020 um 08:10:53 
> UTC+2:
>
>> Hi all,
>> I am using the new choice iterator in tesseract 5 to get the confidences 
>> for all choices for each symbol of my text. But spaces (word bounderies) 
>> are not shown, so I have no way to know when a space is between symbols. Is 
>> there a way to for example combine the word iterator with the choice 
>> iterator or any other way to know when a new word starts?
>>
>

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/525e4eb6-43b4-4d8f-b68f-b9b82e8b0937n%40googlegroups.com.

Reply via email to