I get a null PageIterator (Empty page) via the c-api from python on some rotated images (the cli executable works fine) see issue #326 <https://github.com/tesseract-ocr/tesseract/issues/326>
image: eurotext.tif <https://github.com/tesseract-ocr/tesseract/blob/master/testing/eurotext.tif> setup reubano@tokpro [~]⚡ convert eurotext.tif -rotate 3 +repage eurotext_03.tif reubano@tokpro [~]⚡ convert eurotext.tif -rotate 5 +repage eurotext_05.tif `bug.py` #!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import print_function, division from os import path as p, environ from ctypes import ( CDLL, POINTER, Structure, c_char_p, c_bool, c_int, c_float, byref) from ctypes.util import find_library LIBTESS = find_library('libtesseract.dylib') LIBLEPT = find_library('liblept.dylib') TESSDATA_PREFIX = environ.get('TESSDATA_PREFIX', '/opt/local/share') class TessBaseAPI(Structure): pass class Pix(Structure): pass class TessPageIterator(Structure): pass def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'): tesseract = CDLL(LIBTESS) leptonica = CDLL(LIBLEPT) base_api = POINTER(TessBaseAPI) argtypes = [base_api, c_char_p, c_char_p] tesseract.TessBaseAPICreate.restype = base_api tesseract.TessBaseAPIInit3.argtypes = argtypes tesseract.TessBaseAPIInit3.restype = c_bool tesseract.TessBaseAPISetImage2.restype = None tesseract.TessBaseAPISetImage2.argtypes = [base_api, POINTER(Pix)] tesseract.TessBaseAPIAnalyseLayout.argtypes = [base_api] tesseract.TessBaseAPIAnalyseLayout.restype = POINTER(TessPageIterator) tesseract.TessPageIteratorOrientation.argtypes = [ POINTER(TessPageIterator), POINTER(c_int), POINTER(c_int), POINTER(c_int), POINTER(c_float)] tesseract.TessPageIteratorOrientation.restype = None api = tesseract.TessBaseAPICreate() tesseract.TessBaseAPIInit3(api, prefix, lang) leptonica.pixRead.argtypes = [c_char_p] leptonica.pixRead.restype = POINTER(Pix) return tesseract, leptonica, api def get_orientation(tesseract, leptonica, api, path, mode=1): tesseract.TessBaseAPISetPageSegMode(api, mode) pix = leptonica.pixRead(path) tesseract.TessBaseAPISetImage2(api, pix) it = tesseract.TessBaseAPIAnalyseLayout(api) if it: orientation, direction, line_order = c_int(), c_int(), c_int() skew = c_float() tesseract.TessPageIteratorOrientation( it, byref(orientation), byref(direction), byref(line_order), byref(skew)) print('%s: %s' % (path, orientation.value)) if __name__ == '__main__': for path in ['eurotext.tif', 'eurotext_03.tif', 'eurotext_05.tif']: tesseract, leptonica, api = create_tess_api() orientation = get_orientation(tesseract, leptonica, api, path) output reubano@tokpro [~]⚡ python bug.py eurotext.tif: 0 eurotext_03.tif: 0 Empty page!! reubano@tokpro [~]⚡ tesseract eurotext_05.tif - -psm 0 Orientation: 0 Orientation in degrees: 0 Orientation confidence: 18.72 Script: 1 Script confidence: 19.08 mac osx 10.9.5 system reubano@tokpro [~]⚡ tesseract --version tesseract 3.04.00 leptonica-1.71 libgif 4.2.3 : libjpeg 9a : libpng 1.6.21 : libtiff 4.0.6 : zlib 1.2.8 : libwebp 0.5.0 : libopenjp2 2.1.0 -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at https://groups.google.com/group/tesseract-ocr. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/b9b66578-3003-46a1-93de-40b590868c6b%40googlegroups.com. For more options, visit https://groups.google.com/d/optout.

