Could anyone explain why tesseract recognizes "conf" as a string ?
1. import pytesseract
2. import cv2
3. from pytesseract import Output
4.
5. pytesseract.pytesseract.tesseract_cmd = r"C:\Program
Files\Tesseract-OCR\tesseract.exe"
6. img =
cv2.imread(r"C:\Users\Documents\Python\OCR\Programa\teste_manuscrito_01.jpg")
7. rgb = cv2.cvtColor (img, cv2.COLOR_BGR2RGB)
8.
9. resultado = pytesseract.image_to_data(rgb, lang='por',
output_type=Output.DICT)
10.
11. print(resultado)
12.
13. {'level': [1, 2, 3, 4, 5, 4, 5, 5],
14. 'page_num': [1, 1, 1, 1, 1, 1, 1, 1],
15. 'block_num': [0, 1, 1, 1, 1, 1, 1, 1],
16. 'par_num': [0, 0, 1, 1, 1, 1, 1, 1],
17. 'line_num': [0, 0, 0, 1, 1, 2, 2, 2],
18. 'word_num': [0, 0, 0, 0, 1, 0, 1, 2],
19. 'left': [0, 38, 38, 38, 38, 102, 102, 307],
20. 'top': [0, 79, 79, 79, 79, 228, 233, 228],
21. 'width': [700, 607, 607, 607, 607, 532, 77, 327],
22. 'height': [400, 236, 236, 92, 92, 87, 76, 87],
23. '*conf': ['-1', '-1', '-1', '-1', '90.214363', '-1', '77.749153',
'61.677670'],*
24. 'text': ['', '', '', '', 'TESTANDO', '', 'O', 'OCR...']}
///////////////////////////////////////////
confianca = int(resultado['conf'] [i])
ValueError: invalid literal for int() with base 10: '90.214363'
--
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/tesseract-ocr/6339b18a-95a6-4085-abff-7a0e25af4cd4n%40googlegroups.com.