Hello community, here is the log from the commit of package python-tesserocr for openSUSE:Factory checked in at 2019-08-24 18:47:27 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-tesserocr (Old) and /work/SRC/openSUSE:Factory/.python-tesserocr.new.7948 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-tesserocr" Sat Aug 24 18:47:27 2019 rev:7 rq:725668 version:2.4.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-tesserocr/python-tesserocr.changes 2018-12-07 14:35:05.147094570 +0100 +++ /work/SRC/openSUSE:Factory/.python-tesserocr.new.7948/python-tesserocr.changes 2019-08-24 18:47:34.657750513 +0200 @@ -1,0 +2,7 @@ +Fri Aug 23 18:14:51 UTC 2019 - Martin Herkt <9+suse@cirno.systems> + +- Update to version 2.4.1 + * fix pixa_to_list python3 segfault + * fix BlockPolygon python3 segfault + +------------------------------------------------------------------- Old: ---- tesserocr-2.4.0.tar.gz New: ---- tesserocr-2.4.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-tesserocr.spec ++++++ --- /var/tmp/diff_new_pack.4kMJVM/_old 2019-08-24 18:47:37.113750276 +0200 +++ /var/tmp/diff_new_pack.4kMJVM/_new 2019-08-24 18:47:37.113750276 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-tesserocr # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-tesserocr -Version: 2.4.0 +Version: 2.4.1 Release: 0 Summary: A Python wrapper around tesseract-ocr License: MIT ++++++ tesserocr-2.4.0.tar.gz -> tesserocr-2.4.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/PKG-INFO new/tesserocr-2.4.1/PKG-INFO --- old/tesserocr-2.4.0/PKG-INFO 2018-12-05 15:37:32.000000000 +0100 +++ new/tesserocr-2.4.1/PKG-INFO 2019-08-23 18:03:12.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tesserocr -Version: 2.4.0 +Version: 2.4.1 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API using Cython Home-page: https://github.com/sirfz/tesserocr Author: Fayez Zouheiry @@ -45,7 +45,7 @@ :: - $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev + $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev pkg-config You may need to `manually compile tesseract`_ for a more recent version. Note that you may need to update your ``LD_LIBRARY_PATH`` environment variable to point to the right library versions in @@ -127,8 +127,8 @@ with PyTessBaseAPI() as api: for img in images: api.SetImageFile(img) - print api.GetUTF8Text() - print api.AllWordConfidences() + print(api.GetUTF8Text()) + print(api.AllWordConfidences()) # api is automatically finalized when used in a with-statement (context manager). # otherwise api.End() should be explicitly called when it's no longer needed. @@ -142,13 +142,13 @@ import tesserocr from PIL import Image - print tesserocr.tesseract_version() # print tesseract-ocr version - print tesserocr.get_languages() # prints tessdata path and list of available languages + print(tesserocr.tesseract_version()) # print tesseract-ocr version + print(tesserocr.get_languages()) # prints tessdata path and list of available languages image = Image.open('sample.jpg') - print tesserocr.image_to_text(image) # print ocr text from image + print(tesserocr.image_to_text(image)) # print ocr text from image # or - print tesserocr.file_to_text('sample.jpg') + print(tesserocr.file_to_text('sample.jpg')) ``image_to_text`` and ``file_to_text`` can be used with ``threading`` to concurrently process multiple images which is highly efficient. @@ -168,15 +168,15 @@ with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) - print 'Found {} textline image components.'.format(len(boxes)) + print('Found {} textline image components.'.format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() - print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " - "confidence: {1}, text: {2}").format(i, conf, ocrResult, **box) + print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " + "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box)) Orientation and script detection (OSD): ``````````````````````````````````````` @@ -193,10 +193,10 @@ it = api.AnalyseLayout() orientation, direction, order, deskew_angle = it.Orientation() - print "Orientation: {:d}".format(orientation) - print "WritingDirection: {:d}".format(direction) - print "TextlineOrder: {:d}".format(order) - print "Deskew angle: {:.4f}".format(deskew_angle) + print("Orientation: {:d}".format(orientation)) + print("WritingDirection: {:d}".format(direction)) + print("TextlineOrder: {:d}".format(order)) + print("Deskew angle: {:.4f}".format(deskew_angle)) or more simply with ``OSD_ONLY`` page segmentation mode: @@ -208,8 +208,8 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOS() - print ("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" - "Script: {script}\nScript confidence: {sconfidence}").format(**os) + print("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" + "Script: {script}\nScript confidence: {sconfidence}".format(**os)) more human-readable info with tesseract 4+ (demonstrates LSTM engine usage): @@ -221,14 +221,16 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOrientationScript() - print ("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" - "Script: {script_name}\nScript confidence: {script_conf}").format(**os) + print("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" + "Script: {script_name}\nScript confidence: {script_conf}".format(**os)) Iterator over the classifier choices for a single symbol: ````````````````````````````````````````````````````````` .. code:: python + from __future__ import print_function + from tesserocr import PyTessBaseAPI, RIL, iterate_level with PyTessBaseAPI() as api: @@ -243,17 +245,17 @@ symbol = r.GetUTF8Text(level) # r == ri conf = r.Confidence(level) if symbol: - print u'symbol {}, conf: {}'.format(symbol, conf), + print(u'symbol {}, conf: {}'.format(symbol, conf), end='') indent = False ci = r.GetChoiceIterator() for c in ci: if indent: - print '\t\t ', - print '\t- ', + print('\t\t ', end='') + print('\t- ', end='') choice = c.GetUTF8Text() # c == ci - print u'{} conf: {}'.format(choice, c.Confidence()) + print(u'{} conf: {}'.format(choice, c.Confidence())) indent = True - print '---------------------------------------------' + print('---------------------------------------------') Keywords: Tesseract,tesseract-ocr,OCR,optical character recognition,PIL,Pillow,Cython Platform: UNKNOWN @@ -266,7 +268,6 @@ Classifier: Operating System :: POSIX Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/README.rst new/tesserocr-2.4.1/README.rst --- old/tesserocr-2.4.0/README.rst 2018-08-13 19:32:31.000000000 +0200 +++ new/tesserocr-2.4.1/README.rst 2019-08-23 18:01:21.000000000 +0200 @@ -37,7 +37,7 @@ :: - $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev + $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev pkg-config You may need to `manually compile tesseract`_ for a more recent version. Note that you may need to update your ``LD_LIBRARY_PATH`` environment variable to point to the right library versions in @@ -119,8 +119,8 @@ with PyTessBaseAPI() as api: for img in images: api.SetImageFile(img) - print api.GetUTF8Text() - print api.AllWordConfidences() + print(api.GetUTF8Text()) + print(api.AllWordConfidences()) # api is automatically finalized when used in a with-statement (context manager). # otherwise api.End() should be explicitly called when it's no longer needed. @@ -134,13 +134,13 @@ import tesserocr from PIL import Image - print tesserocr.tesseract_version() # print tesseract-ocr version - print tesserocr.get_languages() # prints tessdata path and list of available languages + print(tesserocr.tesseract_version()) # print tesseract-ocr version + print(tesserocr.get_languages()) # prints tessdata path and list of available languages image = Image.open('sample.jpg') - print tesserocr.image_to_text(image) # print ocr text from image + print(tesserocr.image_to_text(image)) # print ocr text from image # or - print tesserocr.file_to_text('sample.jpg') + print(tesserocr.file_to_text('sample.jpg')) ``image_to_text`` and ``file_to_text`` can be used with ``threading`` to concurrently process multiple images which is highly efficient. @@ -160,15 +160,15 @@ with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) - print 'Found {} textline image components.'.format(len(boxes)) + print('Found {} textline image components.'.format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() - print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " - "confidence: {1}, text: {2}").format(i, conf, ocrResult, **box) + print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " + "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box)) Orientation and script detection (OSD): ``````````````````````````````````````` @@ -185,10 +185,10 @@ it = api.AnalyseLayout() orientation, direction, order, deskew_angle = it.Orientation() - print "Orientation: {:d}".format(orientation) - print "WritingDirection: {:d}".format(direction) - print "TextlineOrder: {:d}".format(order) - print "Deskew angle: {:.4f}".format(deskew_angle) + print("Orientation: {:d}".format(orientation)) + print("WritingDirection: {:d}".format(direction)) + print("TextlineOrder: {:d}".format(order)) + print("Deskew angle: {:.4f}".format(deskew_angle)) or more simply with ``OSD_ONLY`` page segmentation mode: @@ -200,8 +200,8 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOS() - print ("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" - "Script: {script}\nScript confidence: {sconfidence}").format(**os) + print("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" + "Script: {script}\nScript confidence: {sconfidence}".format(**os)) more human-readable info with tesseract 4+ (demonstrates LSTM engine usage): @@ -213,14 +213,16 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOrientationScript() - print ("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" - "Script: {script_name}\nScript confidence: {script_conf}").format(**os) + print("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" + "Script: {script_name}\nScript confidence: {script_conf}".format(**os)) Iterator over the classifier choices for a single symbol: ````````````````````````````````````````````````````````` .. code:: python + from __future__ import print_function + from tesserocr import PyTessBaseAPI, RIL, iterate_level with PyTessBaseAPI() as api: @@ -235,14 +237,14 @@ symbol = r.GetUTF8Text(level) # r == ri conf = r.Confidence(level) if symbol: - print u'symbol {}, conf: {}'.format(symbol, conf), + print(u'symbol {}, conf: {}'.format(symbol, conf), end='') indent = False ci = r.GetChoiceIterator() for c in ci: if indent: - print '\t\t ', - print '\t- ', + print('\t\t ', end='') + print('\t- ', end='') choice = c.GetUTF8Text() # c == ci - print u'{} conf: {}'.format(choice, c.Confidence()) + print(u'{} conf: {}'.format(choice, c.Confidence())) indent = True - print '---------------------------------------------' + print('---------------------------------------------') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/setup.py new/tesserocr-2.4.1/setup.py --- old/tesserocr-2.4.0/setup.py 2018-11-30 15:43:23.000000000 +0100 +++ new/tesserocr-2.4.1/setup.py 2019-08-23 18:01:21.000000000 +0200 @@ -194,7 +194,6 @@ 'Operating System :: POSIX', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/tesseract.pxd new/tesserocr-2.4.1/tesseract.pxd --- old/tesserocr-2.4.0/tesseract.pxd 2018-11-30 16:01:37.000000000 +0100 +++ new/tesserocr-2.4.1/tesseract.pxd 2019-08-23 18:01:21.000000000 +0200 @@ -36,6 +36,7 @@ int pixWriteMemJpeg(unsigned char **, size_t *, Pix *, int, int) int pixWriteMem(unsigned char **, size_t *, Pix *, int) void pixDestroy(Pix **) + void ptaDestroy(Pta **) int setMsgSeverity(int) void pixaDestroy(Pixa **) void boxaDestroy(Boxa **) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO --- old/tesserocr-2.4.0/tesserocr.egg-info/PKG-INFO 2018-12-05 15:37:31.000000000 +0100 +++ new/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO 2019-08-23 18:03:11.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tesserocr -Version: 2.4.0 +Version: 2.4.1 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API using Cython Home-page: https://github.com/sirfz/tesserocr Author: Fayez Zouheiry @@ -45,7 +45,7 @@ :: - $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev + $ apt-get install tesseract-ocr libtesseract-dev libleptonica-dev pkg-config You may need to `manually compile tesseract`_ for a more recent version. Note that you may need to update your ``LD_LIBRARY_PATH`` environment variable to point to the right library versions in @@ -127,8 +127,8 @@ with PyTessBaseAPI() as api: for img in images: api.SetImageFile(img) - print api.GetUTF8Text() - print api.AllWordConfidences() + print(api.GetUTF8Text()) + print(api.AllWordConfidences()) # api is automatically finalized when used in a with-statement (context manager). # otherwise api.End() should be explicitly called when it's no longer needed. @@ -142,13 +142,13 @@ import tesserocr from PIL import Image - print tesserocr.tesseract_version() # print tesseract-ocr version - print tesserocr.get_languages() # prints tessdata path and list of available languages + print(tesserocr.tesseract_version()) # print tesseract-ocr version + print(tesserocr.get_languages()) # prints tessdata path and list of available languages image = Image.open('sample.jpg') - print tesserocr.image_to_text(image) # print ocr text from image + print(tesserocr.image_to_text(image)) # print ocr text from image # or - print tesserocr.file_to_text('sample.jpg') + print(tesserocr.file_to_text('sample.jpg')) ``image_to_text`` and ``file_to_text`` can be used with ``threading`` to concurrently process multiple images which is highly efficient. @@ -168,15 +168,15 @@ with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) - print 'Found {} textline image components.'.format(len(boxes)) + print('Found {} textline image components.'.format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() - print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " - "confidence: {1}, text: {2}").format(i, conf, ocrResult, **box) + print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " + "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box)) Orientation and script detection (OSD): ``````````````````````````````````````` @@ -193,10 +193,10 @@ it = api.AnalyseLayout() orientation, direction, order, deskew_angle = it.Orientation() - print "Orientation: {:d}".format(orientation) - print "WritingDirection: {:d}".format(direction) - print "TextlineOrder: {:d}".format(order) - print "Deskew angle: {:.4f}".format(deskew_angle) + print("Orientation: {:d}".format(orientation)) + print("WritingDirection: {:d}".format(direction)) + print("TextlineOrder: {:d}".format(order)) + print("Deskew angle: {:.4f}".format(deskew_angle)) or more simply with ``OSD_ONLY`` page segmentation mode: @@ -208,8 +208,8 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOS() - print ("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" - "Script: {script}\nScript confidence: {sconfidence}").format(**os) + print("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n" + "Script: {script}\nScript confidence: {sconfidence}".format(**os)) more human-readable info with tesseract 4+ (demonstrates LSTM engine usage): @@ -221,14 +221,16 @@ api.SetImageFile("/usr/src/tesseract/testing/eurotext.tif") os = api.DetectOrientationScript() - print ("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" - "Script: {script_name}\nScript confidence: {script_conf}").format(**os) + print("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n" + "Script: {script_name}\nScript confidence: {script_conf}".format(**os)) Iterator over the classifier choices for a single symbol: ````````````````````````````````````````````````````````` .. code:: python + from __future__ import print_function + from tesserocr import PyTessBaseAPI, RIL, iterate_level with PyTessBaseAPI() as api: @@ -243,17 +245,17 @@ symbol = r.GetUTF8Text(level) # r == ri conf = r.Confidence(level) if symbol: - print u'symbol {}, conf: {}'.format(symbol, conf), + print(u'symbol {}, conf: {}'.format(symbol, conf), end='') indent = False ci = r.GetChoiceIterator() for c in ci: if indent: - print '\t\t ', - print '\t- ', + print('\t\t ', end='') + print('\t- ', end='') choice = c.GetUTF8Text() # c == ci - print u'{} conf: {}'.format(choice, c.Confidence()) + print(u'{} conf: {}'.format(choice, c.Confidence())) indent = True - print '---------------------------------------------' + print('---------------------------------------------') Keywords: Tesseract,tesseract-ocr,OCR,optical character recognition,PIL,Pillow,Cython Platform: UNKNOWN @@ -266,7 +268,6 @@ Classifier: Operating System :: POSIX Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/tesserocr.pyx new/tesserocr-2.4.1/tesserocr.pyx --- old/tesserocr-2.4.0/tesserocr.pyx 2018-11-30 16:04:36.000000000 +0100 +++ new/tesserocr-2.4.1/tesserocr.pyx 2019-08-23 18:01:38.000000000 +0200 @@ -18,7 +18,7 @@ ['eng', 'osd', 'equ']) """ -__version__ = '2.4.0' +__version__ = '2.4.1' import os from io import BytesIO @@ -362,7 +362,7 @@ cdef pixa_to_list(Pixa *pixa): """Convert Pixa (Array of pixes and boxes) to list of pix, box tuples.""" - return zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]), boxa_to_list(pixa.boxa)) + return list(zip((_pix_to_image(pix) for pix in pixa.pix[:pixa.n]), boxa_to_list(pixa.boxa))) cdef class PyPageIterator: @@ -612,13 +612,16 @@ if pta == NULL: return None try: - return zip((x for x in pta.x[:pta.n]), (y for y in pta.y[:pta.n])) + return list(zip((x for x in pta.x[:pta.n]), (y for y in pta.y[:pta.n]))) finally: - free(pta) + ptaDestroy(&pta) def GetBinaryImage(self, PageIteratorLevel level): """Return a binary image of the current object at the given level. + The image is masked along the polygon outline of the current block, as given + by :meth:`BlockPolygon`. (Pixels outside the mask will be white.) + The position and size match the return from :meth:`BoundingBoxInternal`, and so this could be upscaled with respect to the original input image. @@ -640,6 +643,9 @@ """Return an image of the current object at the given level in greyscale if available in the input. + The image is masked along the polygon outline of the current block, as given + by :meth:`BlockPolygon`. (Pixels outside the mask will be white.) + To guarantee a binary image use :meth:`BinaryImage`. Args: @@ -2338,6 +2344,7 @@ rotation to be applied to the page for the text to be upright and readable. - oconfidence: Orientation confidence. - script: Index of the script with the highest score for this orientation. + (This is _not_ the index of :meth:`get_languages`, which is in alphabetical order.) - sconfidence: script confidence. """ cdef OSResults results diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.0/tests/test_api.py new/tesserocr-2.4.1/tests/test_api.py --- old/tesserocr-2.4.0/tests/test_api.py 2018-11-30 16:23:48.000000000 +0100 +++ new/tesserocr-2.4.1/tests/test_api.py 2019-08-23 18:01:21.000000000 +0200 @@ -225,7 +225,10 @@ orientation = self._api.DetectOS() all(self.assertIn(k, orientation) for k in ['sconfidence', 'oconfidence', 'script', 'orientation']) self.assertEqual(orientation['orientation'], 0) - self.assertEqual(orientation['script'], 1) + languages = tesserocr.get_languages()[1] # this is sorted alphabetically! + self.assertLess(orientation['script'], len(languages)) + script_name = languages[orientation['script']] # therefore does not work + #self.assertEqual(script_name, 'Latin') # cannot test: not reliable if _TESSERACT_VERSION >= 0x3999800: orientation = self._api.DetectOrientationScript() all(self.assertIn(k, orientation) for k in ['orient_deg', 'orient_conf', 'script_name', 'script_conf']) @@ -263,6 +266,51 @@ # Test if empty self.assertFalse(result) + def test_layout_getcomponents(self): + self._api.Init() + self._api.SetImageFile(self._image_file) + result = self._api.GetComponentImages(tesserocr.RIL.BLOCK, True) + # Test if not empty + self.assertTrue(result) + _, xywh, _, _ = result[0] # bbox of largest + self.assertIn('w', xywh) + self.assertIn('h', xywh) + area = xywh['w'] * xywh['h'] + # Test if the largest block is quite large + self.assertGreater(area, 400000) + + def test_layout_boundingbox(self): + self._api.Init() + self._api.SetImageFile(self._image_file) + layout = self._api.AnalyseLayout() + # Test if not empty + self.assertTrue(layout) + self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK)) + result = layout.BoundingBox(tesserocr.RIL.BLOCK) # bbox of largest + self.assertIsNot(result, None) + x0, y0, x1, y1 = result + area = (x1 - x0) * (y1 - y0) + # Test if the largest block is quite large + self.assertGreater(area, 400000) + + def test_layout_blockpolygon(self): + self._api.Init() + self._api.SetImageFile(self._image_file) + layout = self._api.AnalyseLayout() + # Test if not empty + self.assertTrue(layout) + self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK)) + result = layout.BlockPolygon() # polygon of largest + # Test if not empty + self.assertIsNot(result, None) + # Test there are at least 4 contour points + self.assertGreaterEqual(len(result), 4) + xs, ys = zip(*result) + x0, y0, x1, y1 = min(xs), min(ys), max(xs), max(ys) + area = (x1 - x0) * (y1 - y0) + # Test if the largest block is quite large + self.assertGreater(area, 400000) + def test_recognize(self): """Test Recognize with and without timeout.""" self._api.SetImageFile(self._image_file)