Hello community,
here is the log from the commit of package python-tesserocr for
openSUSE:Factory checked in at 2019-08-24 18:47:27
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-tesserocr (Old)
and /work/SRC/openSUSE:Factory/.python-tesserocr.new.7948 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-tesserocr"
Sat Aug 24 18:47:27 2019 rev:7 rq:725668 version:2.4.1
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-tesserocr/python-tesserocr.changes
2018-12-07 14:35:05.147094570 +0100
+++
/work/SRC/openSUSE:Factory/.python-tesserocr.new.7948/python-tesserocr.changes
2019-08-24 18:47:34.657750513 +0200
@@ -1,0 +2,7 @@
+Fri Aug 23 18:14:51 UTC 2019 - Martin Herkt <[email protected]>
+
+- Update to version 2.4.1
+ * fix pixa_to_list python3 segfault
+ * fix BlockPolygon python3 segfault
+
+-------------------------------------------------------------------
Old:
----
tesserocr-2.4.0.tar.gz
New:
----
tesserocr-2.4.1.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-tesserocr.spec ++++++
--- /var/tmp/diff_new_pack.4kMJVM/_old 2019-08-24 18:47:37.113750276 +0200
+++ /var/tmp/diff_new_pack.4kMJVM/_new 2019-08-24 18:47:37.113750276 +0200
@@ -1,7 +1,7 @@
#
# spec file for package python-tesserocr
#
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-tesserocr
-Version: 2.4.0
+Version: 2.4.1
Release: 0
Summary: A Python wrapper around tesseract-ocr
License: MIT
++++++ tesserocr-2.4.0.tar.gz -> tesserocr-2.4.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/PKG-INFO new/tesserocr-2.4.1/PKG-INFO
--- old/tesserocr-2.4.0/PKG-INFO 2018-12-05 15:37:32.000000000 +0100
+++ new/tesserocr-2.4.1/PKG-INFO 2019-08-23 18:03:12.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: tesserocr
-Version: 2.4.0
+Version: 2.4.1
Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API
using Cython
Home-page: https://github.com/sirfz/tesserocr
Author: Fayez Zouheiry
@@ -45,7 +45,7 @@
::
- $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+ $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
pkg-config
You may need to `manually compile tesseract`_ for a more recent
version. Note that you may need
to update your ``LD_LIBRARY_PATH`` environment variable to point to
the right library versions in
@@ -127,8 +127,8 @@
with PyTessBaseAPI() as api:
for img in images:
api.SetImageFile(img)
- print api.GetUTF8Text()
- print api.AllWordConfidences()
+ print(api.GetUTF8Text())
+ print(api.AllWordConfidences())
# api is automatically finalized when used in a with-statement
(context manager).
# otherwise api.End() should be explicitly called when it's no
longer needed.
@@ -142,13 +142,13 @@
import tesserocr
from PIL import Image
- print tesserocr.tesseract_version() # print tesseract-ocr version
- print tesserocr.get_languages() # prints tessdata path and list
of available languages
+ print(tesserocr.tesseract_version()) # print tesseract-ocr version
+ print(tesserocr.get_languages()) # prints tessdata path and list
of available languages
image = Image.open('sample.jpg')
- print tesserocr.image_to_text(image) # print ocr text from image
+ print(tesserocr.image_to_text(image)) # print ocr text from image
# or
- print tesserocr.file_to_text('sample.jpg')
+ print(tesserocr.file_to_text('sample.jpg'))
``image_to_text`` and ``file_to_text`` can be used with ``threading``
to
concurrently process multiple images which is highly efficient.
@@ -168,15 +168,15 @@
with PyTessBaseAPI() as api:
api.SetImage(image)
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
- print 'Found {} textline image components.'.format(len(boxes))
+ print('Found {} textline image components.'.format(len(boxes)))
for i, (im, box, _, _) in enumerate(boxes):
# im is a PIL image object
# box is a dict with x, y, w and h keys
api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
ocrResult = api.GetUTF8Text()
conf = api.MeanTextConf()
- print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
- "confidence: {1}, text: {2}").format(i, conf,
ocrResult, **box)
+ print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+ "confidence: {1}, text: {2}".format(i, conf,
ocrResult, **box))
Orientation and script detection (OSD):
```````````````````````````````````````
@@ -193,10 +193,10 @@
it = api.AnalyseLayout()
orientation, direction, order, deskew_angle = it.Orientation()
- print "Orientation: {:d}".format(orientation)
- print "WritingDirection: {:d}".format(direction)
- print "TextlineOrder: {:d}".format(order)
- print "Deskew angle: {:.4f}".format(deskew_angle)
+ print("Orientation: {:d}".format(orientation))
+ print("WritingDirection: {:d}".format(direction))
+ print("TextlineOrder: {:d}".format(order))
+ print("Deskew angle: {:.4f}".format(deskew_angle))
or more simply with ``OSD_ONLY`` page segmentation mode:
@@ -208,8 +208,8 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOS()
- print ("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
- "Script: {script}\nScript confidence:
{sconfidence}").format(**os)
+ print("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
+ "Script: {script}\nScript confidence:
{sconfidence}".format(**os))
more human-readable info with tesseract 4+ (demonstrates LSTM engine
usage):
@@ -221,14 +221,16 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOrientationScript()
- print ("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
- "Script: {script_name}\nScript confidence:
{script_conf}").format(**os)
+ print("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
+ "Script: {script_name}\nScript confidence:
{script_conf}".format(**os))
Iterator over the classifier choices for a single symbol:
`````````````````````````````````````````````````````````
.. code:: python
+ from __future__ import print_function
+
from tesserocr import PyTessBaseAPI, RIL, iterate_level
with PyTessBaseAPI() as api:
@@ -243,17 +245,17 @@
symbol = r.GetUTF8Text(level) # r == ri
conf = r.Confidence(level)
if symbol:
- print u'symbol {}, conf: {}'.format(symbol, conf),
+ print(u'symbol {}, conf: {}'.format(symbol, conf),
end='')
indent = False
ci = r.GetChoiceIterator()
for c in ci:
if indent:
- print '\t\t ',
- print '\t- ',
+ print('\t\t ', end='')
+ print('\t- ', end='')
choice = c.GetUTF8Text() # c == ci
- print u'{} conf: {}'.format(choice, c.Confidence())
+ print(u'{} conf: {}'.format(choice, c.Confidence()))
indent = True
- print '---------------------------------------------'
+ print('---------------------------------------------')
Keywords: Tesseract,tesseract-ocr,OCR,optical character
recognition,PIL,Pillow,Cython
Platform: UNKNOWN
@@ -266,7 +268,6 @@
Classifier: Operating System :: POSIX
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/README.rst
new/tesserocr-2.4.1/README.rst
--- old/tesserocr-2.4.0/README.rst 2018-08-13 19:32:31.000000000 +0200
+++ new/tesserocr-2.4.1/README.rst 2019-08-23 18:01:21.000000000 +0200
@@ -37,7 +37,7 @@
::
- $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+ $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
pkg-config
You may need to `manually compile tesseract`_ for a more recent version. Note
that you may need
to update your ``LD_LIBRARY_PATH`` environment variable to point to the right
library versions in
@@ -119,8 +119,8 @@
with PyTessBaseAPI() as api:
for img in images:
api.SetImageFile(img)
- print api.GetUTF8Text()
- print api.AllWordConfidences()
+ print(api.GetUTF8Text())
+ print(api.AllWordConfidences())
# api is automatically finalized when used in a with-statement (context
manager).
# otherwise api.End() should be explicitly called when it's no longer
needed.
@@ -134,13 +134,13 @@
import tesserocr
from PIL import Image
- print tesserocr.tesseract_version() # print tesseract-ocr version
- print tesserocr.get_languages() # prints tessdata path and list of
available languages
+ print(tesserocr.tesseract_version()) # print tesseract-ocr version
+ print(tesserocr.get_languages()) # prints tessdata path and list of
available languages
image = Image.open('sample.jpg')
- print tesserocr.image_to_text(image) # print ocr text from image
+ print(tesserocr.image_to_text(image)) # print ocr text from image
# or
- print tesserocr.file_to_text('sample.jpg')
+ print(tesserocr.file_to_text('sample.jpg'))
``image_to_text`` and ``file_to_text`` can be used with ``threading`` to
concurrently process multiple images which is highly efficient.
@@ -160,15 +160,15 @@
with PyTessBaseAPI() as api:
api.SetImage(image)
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
- print 'Found {} textline image components.'.format(len(boxes))
+ print('Found {} textline image components.'.format(len(boxes)))
for i, (im, box, _, _) in enumerate(boxes):
# im is a PIL image object
# box is a dict with x, y, w and h keys
api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
ocrResult = api.GetUTF8Text()
conf = api.MeanTextConf()
- print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
- "confidence: {1}, text: {2}").format(i, conf, ocrResult,
**box)
+ print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+ "confidence: {1}, text: {2}".format(i, conf, ocrResult,
**box))
Orientation and script detection (OSD):
```````````````````````````````````````
@@ -185,10 +185,10 @@
it = api.AnalyseLayout()
orientation, direction, order, deskew_angle = it.Orientation()
- print "Orientation: {:d}".format(orientation)
- print "WritingDirection: {:d}".format(direction)
- print "TextlineOrder: {:d}".format(order)
- print "Deskew angle: {:.4f}".format(deskew_angle)
+ print("Orientation: {:d}".format(orientation))
+ print("WritingDirection: {:d}".format(direction))
+ print("TextlineOrder: {:d}".format(order))
+ print("Deskew angle: {:.4f}".format(deskew_angle))
or more simply with ``OSD_ONLY`` page segmentation mode:
@@ -200,8 +200,8 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOS()
- print ("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
- "Script: {script}\nScript confidence:
{sconfidence}").format(**os)
+ print("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
+ "Script: {script}\nScript confidence:
{sconfidence}".format(**os))
more human-readable info with tesseract 4+ (demonstrates LSTM engine usage):
@@ -213,14 +213,16 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOrientationScript()
- print ("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
- "Script: {script_name}\nScript confidence:
{script_conf}").format(**os)
+ print("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
+ "Script: {script_name}\nScript confidence:
{script_conf}".format(**os))
Iterator over the classifier choices for a single symbol:
`````````````````````````````````````````````````````````
.. code:: python
+ from __future__ import print_function
+
from tesserocr import PyTessBaseAPI, RIL, iterate_level
with PyTessBaseAPI() as api:
@@ -235,14 +237,14 @@
symbol = r.GetUTF8Text(level) # r == ri
conf = r.Confidence(level)
if symbol:
- print u'symbol {}, conf: {}'.format(symbol, conf),
+ print(u'symbol {}, conf: {}'.format(symbol, conf), end='')
indent = False
ci = r.GetChoiceIterator()
for c in ci:
if indent:
- print '\t\t ',
- print '\t- ',
+ print('\t\t ', end='')
+ print('\t- ', end='')
choice = c.GetUTF8Text() # c == ci
- print u'{} conf: {}'.format(choice, c.Confidence())
+ print(u'{} conf: {}'.format(choice, c.Confidence()))
indent = True
- print '---------------------------------------------'
+ print('---------------------------------------------')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/setup.py new/tesserocr-2.4.1/setup.py
--- old/tesserocr-2.4.0/setup.py 2018-11-30 15:43:23.000000000 +0100
+++ new/tesserocr-2.4.1/setup.py 2019-08-23 18:01:21.000000000 +0200
@@ -194,7 +194,6 @@
'Operating System :: POSIX',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/tesseract.pxd
new/tesserocr-2.4.1/tesseract.pxd
--- old/tesserocr-2.4.0/tesseract.pxd 2018-11-30 16:01:37.000000000 +0100
+++ new/tesserocr-2.4.1/tesseract.pxd 2019-08-23 18:01:21.000000000 +0200
@@ -36,6 +36,7 @@
int pixWriteMemJpeg(unsigned char **, size_t *, Pix *, int, int)
int pixWriteMem(unsigned char **, size_t *, Pix *, int)
void pixDestroy(Pix **)
+ void ptaDestroy(Pta **)
int setMsgSeverity(int)
void pixaDestroy(Pixa **)
void boxaDestroy(Boxa **)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO
new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO
--- old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO 2018-12-05
15:37:31.000000000 +0100
+++ new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO 2019-08-23
18:03:11.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: tesserocr
-Version: 2.4.0
+Version: 2.4.1
Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API
using Cython
Home-page: https://github.com/sirfz/tesserocr
Author: Fayez Zouheiry
@@ -45,7 +45,7 @@
::
- $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+ $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
pkg-config
You may need to `manually compile tesseract`_ for a more recent
version. Note that you may need
to update your ``LD_LIBRARY_PATH`` environment variable to point to
the right library versions in
@@ -127,8 +127,8 @@
with PyTessBaseAPI() as api:
for img in images:
api.SetImageFile(img)
- print api.GetUTF8Text()
- print api.AllWordConfidences()
+ print(api.GetUTF8Text())
+ print(api.AllWordConfidences())
# api is automatically finalized when used in a with-statement
(context manager).
# otherwise api.End() should be explicitly called when it's no
longer needed.
@@ -142,13 +142,13 @@
import tesserocr
from PIL import Image
- print tesserocr.tesseract_version() # print tesseract-ocr version
- print tesserocr.get_languages() # prints tessdata path and list
of available languages
+ print(tesserocr.tesseract_version()) # print tesseract-ocr version
+ print(tesserocr.get_languages()) # prints tessdata path and list
of available languages
image = Image.open('sample.jpg')
- print tesserocr.image_to_text(image) # print ocr text from image
+ print(tesserocr.image_to_text(image)) # print ocr text from image
# or
- print tesserocr.file_to_text('sample.jpg')
+ print(tesserocr.file_to_text('sample.jpg'))
``image_to_text`` and ``file_to_text`` can be used with ``threading``
to
concurrently process multiple images which is highly efficient.
@@ -168,15 +168,15 @@
with PyTessBaseAPI() as api:
api.SetImage(image)
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
- print 'Found {} textline image components.'.format(len(boxes))
+ print('Found {} textline image components.'.format(len(boxes)))
for i, (im, box, _, _) in enumerate(boxes):
# im is a PIL image object
# box is a dict with x, y, w and h keys
api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
ocrResult = api.GetUTF8Text()
conf = api.MeanTextConf()
- print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
- "confidence: {1}, text: {2}").format(i, conf,
ocrResult, **box)
+ print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+ "confidence: {1}, text: {2}".format(i, conf,
ocrResult, **box))
Orientation and script detection (OSD):
```````````````````````````````````````
@@ -193,10 +193,10 @@
it = api.AnalyseLayout()
orientation, direction, order, deskew_angle = it.Orientation()
- print "Orientation: {:d}".format(orientation)
- print "WritingDirection: {:d}".format(direction)
- print "TextlineOrder: {:d}".format(order)
- print "Deskew angle: {:.4f}".format(deskew_angle)
+ print("Orientation: {:d}".format(orientation))
+ print("WritingDirection: {:d}".format(direction))
+ print("TextlineOrder: {:d}".format(order))
+ print("Deskew angle: {:.4f}".format(deskew_angle))
or more simply with ``OSD_ONLY`` page segmentation mode:
@@ -208,8 +208,8 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOS()
- print ("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
- "Script: {script}\nScript confidence:
{sconfidence}").format(**os)
+ print("Orientation: {orientation}\nOrientation confidence:
{oconfidence}\n"
+ "Script: {script}\nScript confidence:
{sconfidence}".format(**os))
more human-readable info with tesseract 4+ (demonstrates LSTM engine
usage):
@@ -221,14 +221,16 @@
api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
os = api.DetectOrientationScript()
- print ("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
- "Script: {script_name}\nScript confidence:
{script_conf}").format(**os)
+ print("Orientation: {orient_deg}\nOrientation confidence:
{orient_conf}\n"
+ "Script: {script_name}\nScript confidence:
{script_conf}".format(**os))
Iterator over the classifier choices for a single symbol:
`````````````````````````````````````````````````````````
.. code:: python
+ from __future__ import print_function
+
from tesserocr import PyTessBaseAPI, RIL, iterate_level
with PyTessBaseAPI() as api:
@@ -243,17 +245,17 @@
symbol = r.GetUTF8Text(level) # r == ri
conf = r.Confidence(level)
if symbol:
- print u'symbol {}, conf: {}'.format(symbol, conf),
+ print(u'symbol {}, conf: {}'.format(symbol, conf),
end='')
indent = False
ci = r.GetChoiceIterator()
for c in ci:
if indent:
- print '\t\t ',
- print '\t- ',
+ print('\t\t ', end='')
+ print('\t- ', end='')
choice = c.GetUTF8Text() # c == ci
- print u'{} conf: {}'.format(choice, c.Confidence())
+ print(u'{} conf: {}'.format(choice, c.Confidence()))
indent = True
- print '---------------------------------------------'
+ print('---------------------------------------------')
Keywords: Tesseract,tesseract-ocr,OCR,optical character
recognition,PIL,Pillow,Cython
Platform: UNKNOWN
@@ -266,7 +268,6 @@
Classifier: Operating System :: POSIX
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.pyx
new/tesserocr-2.4.1/tesserocr.pyx
--- old/tesserocr-2.4.0/tesserocr.pyx 2018-11-30 16:04:36.000000000 +0100
+++ new/tesserocr-2.4.1/tesserocr.pyx 2019-08-23 18:01:38.000000000 +0200
@@ -18,7 +18,7 @@
['eng', 'osd', 'equ'])
"""
-__version__ = '2.4.0'
+__version__ = '2.4.1'
import os
from io import BytesIO
@@ -362,7 +362,7 @@
cdef pixa_to_list(Pixa *pixa):
"""Convert Pixa (Array of pixes and boxes) to list of pix, box tuples."""
- return zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]),
boxa_to_list(pixa.boxa))
+ return list(zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]),
boxa_to_list(pixa.boxa)))
cdef class PyPageIterator:
@@ -612,13 +612,16 @@
if pta == NULL:
return None
try:
- return zip((x for x in pta.x[:pta.n]), (y for y in pta.y[:pta.n]))
+ return list(zip((x for x in pta.x[:pta.n]), (y for y in
pta.y[:pta.n])))
finally:
- free(pta)
+ ptaDestroy(&pta)
def GetBinaryImage(self, PageIteratorLevel level):
"""Return a binary image of the current object at the given level.
+ The image is masked along the polygon outline of the current block, as
given
+ by :meth:`BlockPolygon`. (Pixels outside the mask will be white.)
+
The position and size match the return from
:meth:`BoundingBoxInternal`, and so
this could be upscaled with respect to the original input image.
@@ -640,6 +643,9 @@
"""Return an image of the current object at the given level in
greyscale
if available in the input.
+ The image is masked along the polygon outline of the current block, as
given
+ by :meth:`BlockPolygon`. (Pixels outside the mask will be white.)
+
To guarantee a binary image use :meth:`BinaryImage`.
Args:
@@ -2338,6 +2344,7 @@
rotation to be applied to the page for the text to be
upright and readable.
- oconfidence: Orientation confidence.
- script: Index of the script with the highest score for this
orientation.
+ (This is _not_ the index of :meth:`get_languages`, which is
in alphabetical order.)
- sconfidence: script confidence.
"""
cdef OSResults results
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/tesserocr-2.4.0/tests/test_api.py
new/tesserocr-2.4.1/tests/test_api.py
--- old/tesserocr-2.4.0/tests/test_api.py 2018-11-30 16:23:48.000000000
+0100
+++ new/tesserocr-2.4.1/tests/test_api.py 2019-08-23 18:01:21.000000000
+0200
@@ -225,7 +225,10 @@
orientation = self._api.DetectOS()
all(self.assertIn(k, orientation) for k in ['sconfidence',
'oconfidence', 'script', 'orientation'])
self.assertEqual(orientation['orientation'], 0)
- self.assertEqual(orientation['script'], 1)
+ languages = tesserocr.get_languages()[1] # this is sorted
alphabetically!
+ self.assertLess(orientation['script'], len(languages))
+ script_name = languages[orientation['script']] # therefore does not
work
+ #self.assertEqual(script_name, 'Latin') # cannot test: not reliable
if _TESSERACT_VERSION >= 0x3999800:
orientation = self._api.DetectOrientationScript()
all(self.assertIn(k, orientation) for k in ['orient_deg',
'orient_conf', 'script_name', 'script_conf'])
@@ -263,6 +266,51 @@
# Test if empty
self.assertFalse(result)
+ def test_layout_getcomponents(self):
+ self._api.Init()
+ self._api.SetImageFile(self._image_file)
+ result = self._api.GetComponentImages(tesserocr.RIL.BLOCK, True)
+ # Test if not empty
+ self.assertTrue(result)
+ _, xywh, _, _ = result[0] # bbox of largest
+ self.assertIn('w', xywh)
+ self.assertIn('h', xywh)
+ area = xywh['w'] * xywh['h']
+ # Test if the largest block is quite large
+ self.assertGreater(area, 400000)
+
+ def test_layout_boundingbox(self):
+ self._api.Init()
+ self._api.SetImageFile(self._image_file)
+ layout = self._api.AnalyseLayout()
+ # Test if not empty
+ self.assertTrue(layout)
+ self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
+ result = layout.BoundingBox(tesserocr.RIL.BLOCK) # bbox of largest
+ self.assertIsNot(result, None)
+ x0, y0, x1, y1 = result
+ area = (x1 - x0) * (y1 - y0)
+ # Test if the largest block is quite large
+ self.assertGreater(area, 400000)
+
+ def test_layout_blockpolygon(self):
+ self._api.Init()
+ self._api.SetImageFile(self._image_file)
+ layout = self._api.AnalyseLayout()
+ # Test if not empty
+ self.assertTrue(layout)
+ self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
+ result = layout.BlockPolygon() # polygon of largest
+ # Test if not empty
+ self.assertIsNot(result, None)
+ # Test there are at least 4 contour points
+ self.assertGreaterEqual(len(result), 4)
+ xs, ys = zip(*result)
+ x0, y0, x1, y1 = min(xs), min(ys), max(xs), max(ys)
+ area = (x1 - x0) * (y1 - y0)
+ # Test if the largest block is quite large
+ self.assertGreater(area, 400000)
+
def test_recognize(self):
"""Test Recognize with and without timeout."""
self._api.SetImageFile(self._image_file)