commit python-tesserocr for openSUSE:Factory

root Sat, 24 Aug 2019 09:47:42 -0700

Hello community,

here is the log from the commit of package python-tesserocr for 
openSUSE:Factory checked in at 2019-08-24 18:47:27
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-tesserocr (Old)
 and      /work/SRC/openSUSE:Factory/.python-tesserocr.new.7948 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-tesserocr"

Sat Aug 24 18:47:27 2019 rev:7 rq:725668 version:2.4.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-tesserocr/python-tesserocr.changes        
2018-12-07 14:35:05.147094570 +0100
+++ 
/work/SRC/openSUSE:Factory/.python-tesserocr.new.7948/python-tesserocr.changes  
    2019-08-24 18:47:34.657750513 +0200
@@ -1,0 +2,7 @@
+Fri Aug 23 18:14:51 UTC 2019 - Martin Herkt <9+suse@cirno.systems>
+
+- Update to version 2.4.1
+  * fix pixa_to_list python3 segfault
+  * fix BlockPolygon python3 segfault
+
+-------------------------------------------------------------------

Old:
----
  tesserocr-2.4.0.tar.gz

New:
----
  tesserocr-2.4.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-tesserocr.spec ++++++
--- /var/tmp/diff_new_pack.4kMJVM/_old  2019-08-24 18:47:37.113750276 +0200
+++ /var/tmp/diff_new_pack.4kMJVM/_new  2019-08-24 18:47:37.113750276 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-tesserocr
 #
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-tesserocr
-Version:        2.4.0
+Version:        2.4.1
 Release:        0
 Summary:        A Python wrapper around tesseract-ocr
 License:        MIT

++++++ tesserocr-2.4.0.tar.gz -> tesserocr-2.4.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/PKG-INFO new/tesserocr-2.4.1/PKG-INFO
--- old/tesserocr-2.4.0/PKG-INFO        2018-12-05 15:37:32.000000000 +0100
+++ new/tesserocr-2.4.1/PKG-INFO        2019-08-23 18:03:12.000000000 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tesserocr
-Version: 2.4.0
+Version: 2.4.1
 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API 
using Cython
 Home-page: https://github.com/sirfz/tesserocr
 Author: Fayez Zouheiry
@@ -45,7 +45,7 @@
         
         ::
         
-            $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+            $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev 
pkg-config
         
         You may need to `manually compile tesseract`_ for a more recent 
version. Note that you may need
         to update your ``LD_LIBRARY_PATH`` environment variable to point to 
the right library versions in
@@ -127,8 +127,8 @@
             with PyTessBaseAPI() as api:
                 for img in images:
                     api.SetImageFile(img)
-                    print api.GetUTF8Text()
-                    print api.AllWordConfidences()
+                    print(api.GetUTF8Text())
+                    print(api.AllWordConfidences())
             # api is automatically finalized when used in a with-statement 
(context manager).
             # otherwise api.End() should be explicitly called when it's no 
longer needed.
         
@@ -142,13 +142,13 @@
             import tesserocr
             from PIL import Image
         
-            print tesserocr.tesseract_version()  # print tesseract-ocr version
-            print tesserocr.get_languages()  # prints tessdata path and list 
of available languages
+            print(tesserocr.tesseract_version())  # print tesseract-ocr version
+            print(tesserocr.get_languages())  # prints tessdata path and list 
of available languages
         
             image = Image.open('sample.jpg')
-            print tesserocr.image_to_text(image)  # print ocr text from image
+            print(tesserocr.image_to_text(image))  # print ocr text from image
             # or
-            print tesserocr.file_to_text('sample.jpg')
+            print(tesserocr.file_to_text('sample.jpg'))
         
         ``image_to_text`` and ``file_to_text`` can be used with ``threading`` 
to
         concurrently process multiple images which is highly efficient.
@@ -168,15 +168,15 @@
             with PyTessBaseAPI() as api:
                 api.SetImage(image)
                 boxes = api.GetComponentImages(RIL.TEXTLINE, True)
-                print 'Found {} textline image components.'.format(len(boxes))
+                print('Found {} textline image components.'.format(len(boxes)))
                 for i, (im, box, _, _) in enumerate(boxes):
                     # im is a PIL image object
                     # box is a dict with x, y, w and h keys
                     api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                     ocrResult = api.GetUTF8Text()
                     conf = api.MeanTextConf()
-                    print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
-                           "confidence: {1}, text: {2}").format(i, conf, 
ocrResult, **box)
+                    print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+                          "confidence: {1}, text: {2}".format(i, conf, 
ocrResult, **box))
         
         Orientation and script detection (OSD):
         ```````````````````````````````````````
@@ -193,10 +193,10 @@
         
                 it = api.AnalyseLayout()
                 orientation, direction, order, deskew_angle = it.Orientation()
-                print "Orientation: {:d}".format(orientation)
-                print "WritingDirection: {:d}".format(direction)
-                print "TextlineOrder: {:d}".format(order)
-                print "Deskew angle: {:.4f}".format(deskew_angle)
+                print("Orientation: {:d}".format(orientation))
+                print("WritingDirection: {:d}".format(direction))
+                print("TextlineOrder: {:d}".format(order))
+                print("Deskew angle: {:.4f}".format(deskew_angle))
         
         or more simply with ``OSD_ONLY`` page segmentation mode:
         
@@ -208,8 +208,8 @@
                 api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
         
                 os = api.DetectOS()
-                print ("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
-                       "Script: {script}\nScript confidence: 
{sconfidence}").format(**os)
+                print("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
+                      "Script: {script}\nScript confidence: 
{sconfidence}".format(**os))
         
         more human-readable info with tesseract 4+ (demonstrates LSTM engine 
usage):
         
@@ -221,14 +221,16 @@
                 api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
         
                 os = api.DetectOrientationScript()
-                print ("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
-                       "Script: {script_name}\nScript confidence: 
{script_conf}").format(**os)
+                print("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
+                      "Script: {script_name}\nScript confidence: 
{script_conf}".format(**os))
         
         Iterator over the classifier choices for a single symbol:
         `````````````````````````````````````````````````````````
         
         .. code:: python
         
+            from __future__ import print_function
+        
             from tesserocr import PyTessBaseAPI, RIL, iterate_level
         
             with PyTessBaseAPI() as api:
@@ -243,17 +245,17 @@
                     symbol = r.GetUTF8Text(level)  # r == ri
                     conf = r.Confidence(level)
                     if symbol:
-                        print u'symbol {}, conf: {}'.format(symbol, conf),
+                        print(u'symbol {}, conf: {}'.format(symbol, conf), 
end='')
                     indent = False
                     ci = r.GetChoiceIterator()
                     for c in ci:
                         if indent:
-                            print '\t\t ',
-                        print '\t- ',
+                            print('\t\t ', end='')
+                        print('\t- ', end='')
                         choice = c.GetUTF8Text()  # c == ci
-                        print u'{} conf: {}'.format(choice, c.Confidence())
+                        print(u'{} conf: {}'.format(choice, c.Confidence()))
                         indent = True
-                    print '---------------------------------------------'
+                    print('---------------------------------------------')
         
 Keywords: Tesseract,tesseract-ocr,OCR,optical character 
recognition,PIL,Pillow,Cython
 Platform: UNKNOWN
@@ -266,7 +268,6 @@
 Classifier: Operating System :: POSIX
 Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/README.rst 
new/tesserocr-2.4.1/README.rst
--- old/tesserocr-2.4.0/README.rst      2018-08-13 19:32:31.000000000 +0200
+++ new/tesserocr-2.4.1/README.rst      2019-08-23 18:01:21.000000000 +0200
@@ -37,7 +37,7 @@
 
 ::
 
-    $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+    $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev 
pkg-config
 
 You may need to `manually compile tesseract`_ for a more recent version. Note 
that you may need
 to update your ``LD_LIBRARY_PATH`` environment variable to point to the right 
library versions in
@@ -119,8 +119,8 @@
     with PyTessBaseAPI() as api:
         for img in images:
             api.SetImageFile(img)
-            print api.GetUTF8Text()
-            print api.AllWordConfidences()
+            print(api.GetUTF8Text())
+            print(api.AllWordConfidences())
     # api is automatically finalized when used in a with-statement (context 
manager).
     # otherwise api.End() should be explicitly called when it's no longer 
needed.
 
@@ -134,13 +134,13 @@
     import tesserocr
     from PIL import Image
 
-    print tesserocr.tesseract_version()  # print tesseract-ocr version
-    print tesserocr.get_languages()  # prints tessdata path and list of 
available languages
+    print(tesserocr.tesseract_version())  # print tesseract-ocr version
+    print(tesserocr.get_languages())  # prints tessdata path and list of 
available languages
 
     image = Image.open('sample.jpg')
-    print tesserocr.image_to_text(image)  # print ocr text from image
+    print(tesserocr.image_to_text(image))  # print ocr text from image
     # or
-    print tesserocr.file_to_text('sample.jpg')
+    print(tesserocr.file_to_text('sample.jpg'))
 
 ``image_to_text`` and ``file_to_text`` can be used with ``threading`` to
 concurrently process multiple images which is highly efficient.
@@ -160,15 +160,15 @@
     with PyTessBaseAPI() as api:
         api.SetImage(image)
         boxes = api.GetComponentImages(RIL.TEXTLINE, True)
-        print 'Found {} textline image components.'.format(len(boxes))
+        print('Found {} textline image components.'.format(len(boxes)))
         for i, (im, box, _, _) in enumerate(boxes):
             # im is a PIL image object
             # box is a dict with x, y, w and h keys
             api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
             ocrResult = api.GetUTF8Text()
             conf = api.MeanTextConf()
-            print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
-                   "confidence: {1}, text: {2}").format(i, conf, ocrResult, 
**box)
+            print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+                  "confidence: {1}, text: {2}".format(i, conf, ocrResult, 
**box))
 
 Orientation and script detection (OSD):
 ```````````````````````````````````````
@@ -185,10 +185,10 @@
 
         it = api.AnalyseLayout()
         orientation, direction, order, deskew_angle = it.Orientation()
-        print "Orientation: {:d}".format(orientation)
-        print "WritingDirection: {:d}".format(direction)
-        print "TextlineOrder: {:d}".format(order)
-        print "Deskew angle: {:.4f}".format(deskew_angle)
+        print("Orientation: {:d}".format(orientation))
+        print("WritingDirection: {:d}".format(direction))
+        print("TextlineOrder: {:d}".format(order))
+        print("Deskew angle: {:.4f}".format(deskew_angle))
 
 or more simply with ``OSD_ONLY`` page segmentation mode:
 
@@ -200,8 +200,8 @@
         api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
 
         os = api.DetectOS()
-        print ("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
-               "Script: {script}\nScript confidence: 
{sconfidence}").format(**os)
+        print("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
+              "Script: {script}\nScript confidence: 
{sconfidence}".format(**os))
 
 more human-readable info with tesseract 4+ (demonstrates LSTM engine usage):
 
@@ -213,14 +213,16 @@
         api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
 
         os = api.DetectOrientationScript()
-        print ("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
-               "Script: {script_name}\nScript confidence: 
{script_conf}").format(**os)
+        print("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
+              "Script: {script_name}\nScript confidence: 
{script_conf}".format(**os))
 
 Iterator over the classifier choices for a single symbol:
 `````````````````````````````````````````````````````````
 
 .. code:: python
 
+    from __future__ import print_function
+
     from tesserocr import PyTessBaseAPI, RIL, iterate_level
 
     with PyTessBaseAPI() as api:
@@ -235,14 +237,14 @@
             symbol = r.GetUTF8Text(level)  # r == ri
             conf = r.Confidence(level)
             if symbol:
-                print u'symbol {}, conf: {}'.format(symbol, conf),
+                print(u'symbol {}, conf: {}'.format(symbol, conf), end='')
             indent = False
             ci = r.GetChoiceIterator()
             for c in ci:
                 if indent:
-                    print '\t\t ',
-                print '\t- ',
+                    print('\t\t ', end='')
+                print('\t- ', end='')
                 choice = c.GetUTF8Text()  # c == ci
-                print u'{} conf: {}'.format(choice, c.Confidence())
+                print(u'{} conf: {}'.format(choice, c.Confidence()))
                 indent = True
-            print '---------------------------------------------'
+            print('---------------------------------------------')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/setup.py new/tesserocr-2.4.1/setup.py
--- old/tesserocr-2.4.0/setup.py        2018-11-30 15:43:23.000000000 +0100
+++ new/tesserocr-2.4.1/setup.py        2019-08-23 18:01:21.000000000 +0200
@@ -194,7 +194,6 @@
           'Operating System :: POSIX',
           'Programming Language :: Python :: 2.7',
           'Programming Language :: Python :: 3',
-          'Programming Language :: Python :: 3.3',
           'Programming Language :: Python :: 3.4',
           'Programming Language :: Python :: 3.5',
           'Programming Language :: Python :: 3.6',
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/tesseract.pxd 
new/tesserocr-2.4.1/tesseract.pxd
--- old/tesserocr-2.4.0/tesseract.pxd   2018-11-30 16:01:37.000000000 +0100
+++ new/tesserocr-2.4.1/tesseract.pxd   2019-08-23 18:01:21.000000000 +0200
@@ -36,6 +36,7 @@
     int pixWriteMemJpeg(unsigned char **, size_t *, Pix *, int, int)
     int pixWriteMem(unsigned char **, size_t *, Pix *, int)
     void pixDestroy(Pix **)
+    void ptaDestroy(Pta **)
     int setMsgSeverity(int)
     void pixaDestroy(Pixa **)
     void boxaDestroy(Boxa **)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO 
new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO
--- old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO     2018-12-05 
15:37:31.000000000 +0100
+++ new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO     2019-08-23 
18:03:11.000000000 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tesserocr
-Version: 2.4.0
+Version: 2.4.1
 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API 
using Cython
 Home-page: https://github.com/sirfz/tesserocr
 Author: Fayez Zouheiry
@@ -45,7 +45,7 @@
         
         ::
         
-            $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev
+            $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev 
pkg-config
         
         You may need to `manually compile tesseract`_ for a more recent 
version. Note that you may need
         to update your ``LD_LIBRARY_PATH`` environment variable to point to 
the right library versions in
@@ -127,8 +127,8 @@
             with PyTessBaseAPI() as api:
                 for img in images:
                     api.SetImageFile(img)
-                    print api.GetUTF8Text()
-                    print api.AllWordConfidences()
+                    print(api.GetUTF8Text())
+                    print(api.AllWordConfidences())
             # api is automatically finalized when used in a with-statement 
(context manager).
             # otherwise api.End() should be explicitly called when it's no 
longer needed.
         
@@ -142,13 +142,13 @@
             import tesserocr
             from PIL import Image
         
-            print tesserocr.tesseract_version()  # print tesseract-ocr version
-            print tesserocr.get_languages()  # prints tessdata path and list 
of available languages
+            print(tesserocr.tesseract_version())  # print tesseract-ocr version
+            print(tesserocr.get_languages())  # prints tessdata path and list 
of available languages
         
             image = Image.open('sample.jpg')
-            print tesserocr.image_to_text(image)  # print ocr text from image
+            print(tesserocr.image_to_text(image))  # print ocr text from image
             # or
-            print tesserocr.file_to_text('sample.jpg')
+            print(tesserocr.file_to_text('sample.jpg'))
         
         ``image_to_text`` and ``file_to_text`` can be used with ``threading`` 
to
         concurrently process multiple images which is highly efficient.
@@ -168,15 +168,15 @@
             with PyTessBaseAPI() as api:
                 api.SetImage(image)
                 boxes = api.GetComponentImages(RIL.TEXTLINE, True)
-                print 'Found {} textline image components.'.format(len(boxes))
+                print('Found {} textline image components.'.format(len(boxes)))
                 for i, (im, box, _, _) in enumerate(boxes):
                     # im is a PIL image object
                     # box is a dict with x, y, w and h keys
                     api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                     ocrResult = api.GetUTF8Text()
                     conf = api.MeanTextConf()
-                    print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
-                           "confidence: {1}, text: {2}").format(i, conf, 
ocrResult, **box)
+                    print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
+                          "confidence: {1}, text: {2}".format(i, conf, 
ocrResult, **box))
         
         Orientation and script detection (OSD):
         ```````````````````````````````````````
@@ -193,10 +193,10 @@
         
                 it = api.AnalyseLayout()
                 orientation, direction, order, deskew_angle = it.Orientation()
-                print "Orientation: {:d}".format(orientation)
-                print "WritingDirection: {:d}".format(direction)
-                print "TextlineOrder: {:d}".format(order)
-                print "Deskew angle: {:.4f}".format(deskew_angle)
+                print("Orientation: {:d}".format(orientation))
+                print("WritingDirection: {:d}".format(direction))
+                print("TextlineOrder: {:d}".format(order))
+                print("Deskew angle: {:.4f}".format(deskew_angle))
         
         or more simply with ``OSD_ONLY`` page segmentation mode:
         
@@ -208,8 +208,8 @@
                 api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
         
                 os = api.DetectOS()
-                print ("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
-                       "Script: {script}\nScript confidence: 
{sconfidence}").format(**os)
+                print("Orientation: {orientation}\nOrientation confidence: 
{oconfidence}\n"
+                      "Script: {script}\nScript confidence: 
{sconfidence}".format(**os))
         
         more human-readable info with tesseract 4+ (demonstrates LSTM engine 
usage):
         
@@ -221,14 +221,16 @@
                 api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif")
         
                 os = api.DetectOrientationScript()
-                print ("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
-                       "Script: {script_name}\nScript confidence: 
{script_conf}").format(**os)
+                print("Orientation: {orient_deg}\nOrientation confidence: 
{orient_conf}\n"
+                      "Script: {script_name}\nScript confidence: 
{script_conf}".format(**os))
         
         Iterator over the classifier choices for a single symbol:
         `````````````````````````````````````````````````````````
         
         .. code:: python
         
+            from __future__ import print_function
+        
             from tesserocr import PyTessBaseAPI, RIL, iterate_level
         
             with PyTessBaseAPI() as api:
@@ -243,17 +245,17 @@
                     symbol = r.GetUTF8Text(level)  # r == ri
                     conf = r.Confidence(level)
                     if symbol:
-                        print u'symbol {}, conf: {}'.format(symbol, conf),
+                        print(u'symbol {}, conf: {}'.format(symbol, conf), 
end='')
                     indent = False
                     ci = r.GetChoiceIterator()
                     for c in ci:
                         if indent:
-                            print '\t\t ',
-                        print '\t- ',
+                            print('\t\t ', end='')
+                        print('\t- ', end='')
                         choice = c.GetUTF8Text()  # c == ci
-                        print u'{} conf: {}'.format(choice, c.Confidence())
+                        print(u'{} conf: {}'.format(choice, c.Confidence()))
                         indent = True
-                    print '---------------------------------------------'
+                    print('---------------------------------------------')
         
 Keywords: Tesseract,tesseract-ocr,OCR,optical character 
recognition,PIL,Pillow,Cython
 Platform: UNKNOWN
@@ -266,7 +268,6 @@
 Classifier: Operating System :: POSIX
 Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.pyx 
new/tesserocr-2.4.1/tesserocr.pyx
--- old/tesserocr-2.4.0/tesserocr.pyx   2018-11-30 16:04:36.000000000 +0100
+++ new/tesserocr-2.4.1/tesserocr.pyx   2019-08-23 18:01:38.000000000 +0200
@@ -18,7 +18,7 @@
  ['eng', 'osd', 'equ'])
 """
 
-__version__ = '2.4.0'
+__version__ = '2.4.1'
 
 import os
 from io import BytesIO
@@ -362,7 +362,7 @@
 
 cdef pixa_to_list(Pixa *pixa):
     """Convert Pixa (Array of pixes and boxes) to list of pix, box tuples."""
-    return zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]), 
boxa_to_list(pixa.boxa))
+    return list(zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]), 
boxa_to_list(pixa.boxa)))
 
 
 cdef class PyPageIterator:
@@ -612,13 +612,16 @@
         if pta == NULL:
             return None
         try:
-            return zip((x for x in pta.x[:pta.n]), (y for y in pta.y[:pta.n]))
+            return list(zip((x for x in pta.x[:pta.n]), (y for y in 
pta.y[:pta.n])))
         finally:
-            free(pta)
+            ptaDestroy(&pta)
 
     def GetBinaryImage(self, PageIteratorLevel level):
         """Return a binary image of the current object at the given level.
 
+        The image is masked along the polygon outline of the current block, as 
given
+        by :meth:`BlockPolygon`. (Pixels outside the mask will be white.)
+
         The position and size match the return from 
:meth:`BoundingBoxInternal`, and so
         this could be upscaled with respect to the original input image.
 
@@ -640,6 +643,9 @@
         """Return an image of the current object at the given level in 
greyscale
         if available in the input.
 
+        The image is masked along the polygon outline of the current block, as 
given
+        by :meth:`BlockPolygon`. (Pixels outside the mask will be white.)
+
         To guarantee a binary image use :meth:`BinaryImage`.
 
         Args:
@@ -2338,6 +2344,7 @@
                   rotation to be applied to the page for the text to be 
upright and readable.
                 - oconfidence: Orientation confidence.
                 - script: Index of the script with the highest score for this 
orientation.
+                  (This is _not_ the index of :meth:`get_languages`, which is 
in alphabetical order.)
                 - sconfidence: script confidence.
         """
         cdef OSResults results
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.0/tests/test_api.py 
new/tesserocr-2.4.1/tests/test_api.py
--- old/tesserocr-2.4.0/tests/test_api.py       2018-11-30 16:23:48.000000000 
+0100
+++ new/tesserocr-2.4.1/tests/test_api.py       2019-08-23 18:01:21.000000000 
+0200
@@ -225,7 +225,10 @@
         orientation = self._api.DetectOS()
         all(self.assertIn(k, orientation) for k in ['sconfidence', 
'oconfidence', 'script', 'orientation'])
         self.assertEqual(orientation['orientation'], 0)
-        self.assertEqual(orientation['script'], 1)
+        languages = tesserocr.get_languages()[1] # this is sorted 
alphabetically!
+        self.assertLess(orientation['script'], len(languages))
+        script_name = languages[orientation['script']] # therefore does not 
work
+        #self.assertEqual(script_name, 'Latin') # cannot test: not reliable
         if _TESSERACT_VERSION >= 0x3999800:
             orientation = self._api.DetectOrientationScript()
             all(self.assertIn(k, orientation) for k in ['orient_deg', 
'orient_conf', 'script_name', 'script_conf'])
@@ -263,6 +266,51 @@
         # Test if empty
         self.assertFalse(result)
 
+    def test_layout_getcomponents(self):
+        self._api.Init()
+        self._api.SetImageFile(self._image_file)
+        result = self._api.GetComponentImages(tesserocr.RIL.BLOCK, True)
+        # Test if not empty
+        self.assertTrue(result)
+        _, xywh, _, _ = result[0] # bbox of largest
+        self.assertIn('w', xywh)
+        self.assertIn('h', xywh)
+        area = xywh['w'] * xywh['h']
+        # Test if the largest block is quite large
+        self.assertGreater(area, 400000)
+
+    def test_layout_boundingbox(self):
+        self._api.Init()
+        self._api.SetImageFile(self._image_file)
+        layout = self._api.AnalyseLayout()
+        # Test if not empty
+        self.assertTrue(layout)
+        self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
+        result = layout.BoundingBox(tesserocr.RIL.BLOCK) # bbox of largest
+        self.assertIsNot(result, None)
+        x0, y0, x1, y1 = result
+        area = (x1 - x0) * (y1 - y0)
+        # Test if the largest block is quite large
+        self.assertGreater(area, 400000)
+
+    def test_layout_blockpolygon(self):
+        self._api.Init()
+        self._api.SetImageFile(self._image_file)
+        layout = self._api.AnalyseLayout()
+        # Test if not empty
+        self.assertTrue(layout)
+        self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
+        result = layout.BlockPolygon() # polygon of largest
+        # Test if not empty
+        self.assertIsNot(result, None)
+        # Test there are at least 4 contour points
+        self.assertGreaterEqual(len(result), 4)
+        xs, ys = zip(*result)
+        x0, y0, x1, y1 = min(xs), min(ys), max(xs), max(ys)
+        area = (x1 - x0) * (y1 - y0)
+        # Test if the largest block is quite large
+        self.assertGreater(area, 400000)
+
     def test_recognize(self):
         """Test Recognize with and without timeout."""
         self._api.SetImageFile(self._image_file)

commit python-tesserocr for openSUSE:Factory

Reply via email to