I get a null PageIterator (Empty page) via the c-api from python on some 
rotated images (the cli executable works fine) see issue #326 
<https://github.com/tesseract-ocr/tesseract/issues/326>

image: eurotext.tif 
<https://github.com/tesseract-ocr/tesseract/blob/master/testing/eurotext.tif>

setup

reubano@tokpro [~]⚡ convert eurotext.tif -rotate 3 +repage eurotext_03.tif
reubano@tokpro [~]⚡ convert eurotext.tif -rotate 5 +repage eurotext_05.tif

`bug.py`

#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import print_function, division

from os import path as p, environ
from ctypes import (
    CDLL, POINTER, Structure, c_char_p, c_bool, c_int, c_float, byref)

from ctypes.util import find_library

LIBTESS = find_library('libtesseract.dylib')
LIBLEPT = find_library('liblept.dylib')
TESSDATA_PREFIX = environ.get('TESSDATA_PREFIX', '/opt/local/share')


class TessBaseAPI(Structure):
    pass


class Pix(Structure):
    pass


class TessPageIterator(Structure):
    pass


def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'):
    tesseract = CDLL(LIBTESS)
    leptonica = CDLL(LIBLEPT)
    base_api = POINTER(TessBaseAPI)
    argtypes = [base_api, c_char_p, c_char_p]

    tesseract.TessBaseAPICreate.restype = base_api
    tesseract.TessBaseAPIInit3.argtypes = argtypes
    tesseract.TessBaseAPIInit3.restype = c_bool
    tesseract.TessBaseAPISetImage2.restype = None
    tesseract.TessBaseAPISetImage2.argtypes = [base_api, POINTER(Pix)]
    tesseract.TessBaseAPIAnalyseLayout.argtypes = [base_api]
    tesseract.TessBaseAPIAnalyseLayout.restype = POINTER(TessPageIterator)
    tesseract.TessPageIteratorOrientation.argtypes = [
        POINTER(TessPageIterator), POINTER(c_int), POINTER(c_int),
        POINTER(c_int), POINTER(c_float)]

    tesseract.TessPageIteratorOrientation.restype = None

    api = tesseract.TessBaseAPICreate()
    tesseract.TessBaseAPIInit3(api, prefix, lang)

    leptonica.pixRead.argtypes = [c_char_p]
    leptonica.pixRead.restype = POINTER(Pix)
    return tesseract, leptonica, api

def get_orientation(tesseract, leptonica, api, path, mode=1):
    tesseract.TessBaseAPISetPageSegMode(api, mode)
    pix = leptonica.pixRead(path)
    tesseract.TessBaseAPISetImage2(api, pix)
    it = tesseract.TessBaseAPIAnalyseLayout(api)

    if it:
        orientation, direction, line_order = c_int(), c_int(), c_int()
        skew = c_float()

        tesseract.TessPageIteratorOrientation(
            it, byref(orientation), byref(direction), byref(line_order),
            byref(skew))

        print('%s: %s' % (path, orientation.value))

if __name__ == '__main__':
    for path in ['eurotext.tif', 'eurotext_03.tif', 'eurotext_05.tif']:
        tesseract, leptonica, api = create_tess_api()
        orientation = get_orientation(tesseract, leptonica, api, path)


output

reubano@tokpro [~]⚡ python bug.py 
eurotext.tif: 0
eurotext_03.tif: 0
Empty page!!

reubano@tokpro [~]⚡ tesseract eurotext_05.tif - -psm 0 
Orientation: 0
Orientation in degrees: 0
Orientation confidence: 18.72
Script: 1
Script confidence: 19.08



mac osx 10.9.5
system

reubano@tokpro [~]⚡ tesseract --version
tesseract 3.04.00
 leptonica-1.71
  libgif 4.2.3 : libjpeg 9a : libpng 1.6.21 : libtiff 4.0.6 : zlib 1.2.8 : 
libwebp 0.5.0 : libopenjp2 2.1.0





-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/b9b66578-3003-46a1-93de-40b590868c6b%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to