Dňa 14.03.2012 19:49, Curtis wrote / napísal(a):
> I am using the vs 3 .net wrapper.
> When I run the function Recognize it ocrs the image fine and I can get
> the string.
> I need the confidence level of each character, but it is always 0.
> What am I doing wrong?
>
>
>
> Dim image As New Bitmap("C:\MyImage.tif")
> Dim ocr As New TesseractProcessor
>
> ocr.Init(Nothing, "eng", False)
> Console.WriteLine(ocr.Recognize(image))
>
>
> ocr.InitForAnalysePage()
> ocr.SetVariable("tessedit_thresholding_method", "1")
> ocr.SetVariable("save_best_choices", "T")
>
>
> Dim doc As DocumentLayout = ocr.AnalyseLayout(image)
> For Each blk As OCR.TesseractWrapper.Block In doc.Blocks
> Console.WriteLine("Block Confidence: " & blk.Confidence)
>
>
> For Each para As Paragraph In blk.Paragraphs
> Console.WriteLine("para Confidence: " &
> para.Confidence)
>
> For Each ln As TextLine In para.Lines
> Console.WriteLine("ln Confidence: " &
> ln.Confidence)
>
> For Each wrd As Word In ln.Words
> Console.WriteLine("wrd Confidence: " &
> wrd.Confidence)
> Console.WriteLine("wrd Text: " & wrd.Text)
>
> For Each ch As Character In wrd.CharList
> Console.WriteLine("V:" & ch.Value)
> Console.WriteLine("C:" & ch.Confidence)
> Next
>
> Next
>
> Next
> Next
> Next
>
Hi,
I am not familiar with .net so I can not help you directly.
It looks like that .net wrapper was not updated for quite a long time
(revision 590 without 3.01 code)...
Anyway if somebody interesting in char confidence he can try to use (in
c++) GetComponentImages&tesseract::RIL_SYMBOL +
PageSegMode&tesseract::PSM_SINGLE_CHAR. Simple test file attached.
Tested in 3.02 (in svn) code.
Zdenko
--
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en
/*
compile:
$ g++ test_confidence.cpp -I/usr/local/include/tesseract/ -I/usr/include/leptonica/ \
-ltesseract -llept -o test_confidence
run:
$ ./test_confidence
*/
#include <baseapi.h>
#include <allheaders.h>
int main() {
Pix *image;
BOX *box;
l_int32 i, nwords;
char* outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
image = pixRead("/usr/src/tesseract-3.02/phototest.tif");
api->SetImage(image);
// split image to symbols
Boxa* boxes = api->GetComponentImages(tesseract::RIL_SYMBOL, true,
NULL, NULL);
api->SetPageSegMode(tesseract::PSM_SINGLE_CHAR);
nwords = boxaGetCount(boxes);
printf("Boxa count: %d\n", nwords);
for (i = 0; i < nwords; i++) {
box = boxaGetBox(boxes, i, L_CLONE);
api->SetRectangle(box->x, box->y, box->w, box->h);
outText = api->GetUTF8Text();
// remove "\n" from outText
outText[strcspn(outText, "\n")] = '\0';
int conf = api->MeanTextConf();
printf("Box[%d]: x=%d, y=%d, string='%s', confidence: %d\n",
i, box->x, box->y, outText, conf);
}
api->Clear();
api->End();
delete [] outText;
pixDestroy(&image);
return 0;
}