Hello community,

here is the log from the commit of package python-tesserocr for 
openSUSE:Factory checked in at 2019-12-03 15:21:47
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-tesserocr (Old)
 and      /work/SRC/openSUSE:Factory/.python-tesserocr.new.4691 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-tesserocr"

Tue Dec  3 15:21:47 2019 rev:8 rq:753070 version:2.5.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-tesserocr/python-tesserocr.changes        
2019-08-24 18:47:34.657750513 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-tesserocr.new.4691/python-tesserocr.changes  
    2019-12-03 15:21:50.130521845 +0100
@@ -1,0 +2,9 @@
+Tue Nov 26 00:49:44 UTC 2019 - Martin Herkt <9+suse@cirno.systems>
+
+- Update to version 2.5.0
+  * Support for RowAttributes method in LTRResultIterator
+  * SetImage: use PNG instead of JPEG fallback
+  * Replace STRING::string() by c_str()
+  * Don't use assignment operator for TessBaseAPI
+
+-------------------------------------------------------------------

Old:
----
  tesserocr-2.4.1.tar.gz

New:
----
  tesserocr-2.5.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-tesserocr.spec ++++++
--- /var/tmp/diff_new_pack.VK66EF/_old  2019-12-03 15:21:50.822521528 +0100
+++ /var/tmp/diff_new_pack.VK66EF/_new  2019-12-03 15:21:50.826521526 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package python-tesserocr
 #
-# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-tesserocr
-Version:        2.4.1
+Version:        2.5.0
 Release:        0
 Summary:        A Python wrapper around tesseract-ocr
 License:        MIT

++++++ tesserocr-2.4.1.tar.gz -> tesserocr-2.5.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/PKG-INFO new/tesserocr-2.5.0/PKG-INFO
--- old/tesserocr-2.4.1/PKG-INFO        2019-08-23 18:03:12.000000000 +0200
+++ new/tesserocr-2.5.0/PKG-INFO        2019-11-09 00:11:54.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tesserocr
-Version: 2.4.1
+Version: 2.5.0
 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API 
using Cython
 Home-page: https://github.com/sirfz/tesserocr
 Author: Fayez Zouheiry
@@ -88,20 +88,16 @@
         
         The proposed downloads consist of stand-alone packages containing all 
the Windows libraries needed for execution. This means that no additional 
installation of tesseract is required on your system.
         
+        The recommended method of installation is via Conda as described below.
+        
         Conda
         `````
         
-        You can use the channel `simonflueckiger 
<https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda:
-        
-        ::
-        
-            > conda install -c simonflueckiger tesserocr
-        
-        or to get **tesserocr** compiled with **tesseract 4.0.0**:
+        You can use the `conda-forge 
<https://anaconda.org/conda-forge/tesserocr>`_ channel to install from Conda:
         
         ::
         
-            > conda install -c simonflueckiger/label/tesseract-4.0.0-master 
tesserocr
+            > conda install -c conda-forge tesserocr
         
         pip
         ```
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/README.rst 
new/tesserocr-2.5.0/README.rst
--- old/tesserocr-2.4.1/README.rst      2019-08-23 18:01:21.000000000 +0200
+++ new/tesserocr-2.5.0/README.rst      2019-11-09 00:11:39.000000000 +0100
@@ -80,20 +80,16 @@
 
 The proposed downloads consist of stand-alone packages containing all the 
Windows libraries needed for execution. This means that no additional 
installation of tesseract is required on your system.
 
+The recommended method of installation is via Conda as described below.
+
 Conda
 `````
 
-You can use the channel `simonflueckiger 
<https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda:
-
-::
-
-    > conda install -c simonflueckiger tesserocr
-
-or to get **tesserocr** compiled with **tesseract 4.0.0**:
+You can use the `conda-forge <https://anaconda.org/conda-forge/tesserocr>`_ 
channel to install from Conda:
 
 ::
 
-    > conda install -c simonflueckiger/label/tesseract-4.0.0-master tesserocr
+    > conda install -c conda-forge tesserocr
 
 pip
 ```
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/tesseract.pxd 
new/tesserocr-2.5.0/tesseract.pxd
--- old/tesserocr-2.4.1/tesseract.pxd   2019-08-23 18:01:21.000000000 +0200
+++ new/tesserocr-2.5.0/tesseract.pxd   2019-11-08 23:49:38.000000000 +0100
@@ -110,7 +110,7 @@
 
 cdef extern from "tesseract/strngs.h" nogil:
     cdef cppclass STRING:
-       cchar_t *string() const
+       cchar_t *c_str() const
        STRING &operator=(cchar_t *)
 
 cdef extern from "tesseract/ocrclass.h" nogil:
@@ -149,6 +149,7 @@
             void SetLineSeparator(cchar_t *)
             void SetParagraphSeparator(cchar_t *)
             float Confidence(PageIteratorLevel) const
+            void RowAttributes(float *, float *, float *) const
             cchar_t *WordFontAttributes(bool *, bool *, bool *, bool *, bool 
*, bool *, int *, int *) const
             cchar_t *WordRecognitionLanguage() const
             StrongScriptDirection WordDirection() const
@@ -157,6 +158,29 @@
             bool WordIsNumeric() const
             bool HasBlamerInfo() const
             cchar_t *GetBlamerDebug() const
+            cchar_t *GetBlamerMisadaptionDebug() const
+            bool HasTruthString() const
+            bool EquivalentToTruth(cchar_t *) const
+            char *WordTruthUTF8Text() const
+            char *WordNormedUTF8Text() const
+            cchar_t *WordLattice(int *) const
+            bool SymbolIsSuperscript() const
+            bool SymbolIsSubscript() const
+            bool SymbolIsDropcap() const
+    ELIF TESSERACT_VERSION >= 0x3040100:
+        cdef cppclass LTRResultIterator(PageIterator):
+            char *GetUTF8Text(PageIteratorLevel) const
+            void SetLineSeparator(cchar_t *)
+            void SetParagraphSeparator(cchar_t *)
+            float Confidence(PageIteratorLevel) const
+            void RowAttributes(float *, float *, float *) const
+            cchar_t *WordFontAttributes(bool *, bool *, bool *, bool *, bool 
*, bool *, int *, int *) const
+            cchar_t *WordRecognitionLanguage() const
+            StrongScriptDirection WordDirection() const
+            bool WordIsFromDictionary() const
+            bool WordIsNumeric() const
+            bool HasBlamerInfo() const
+            cchar_t *GetBlamerDebug() const
             cchar_t *GetBlamerMisadaptionDebug() const
             bool HasTruthString() const
             bool EquivalentToTruth(cchar_t *) const
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO 
new/tesserocr-2.5.0/tesserocr.egg-info/PKG-INFO
--- old/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO     2019-08-23 
18:03:11.000000000 +0200
+++ new/tesserocr-2.5.0/tesserocr.egg-info/PKG-INFO     2019-11-09 
00:11:54.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tesserocr
-Version: 2.4.1
+Version: 2.5.0
 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API 
using Cython
 Home-page: https://github.com/sirfz/tesserocr
 Author: Fayez Zouheiry
@@ -88,20 +88,16 @@
         
         The proposed downloads consist of stand-alone packages containing all 
the Windows libraries needed for execution. This means that no additional 
installation of tesseract is required on your system.
         
+        The recommended method of installation is via Conda as described below.
+        
         Conda
         `````
         
-        You can use the channel `simonflueckiger 
<https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda:
-        
-        ::
-        
-            > conda install -c simonflueckiger tesserocr
-        
-        or to get **tesserocr** compiled with **tesseract 4.0.0**:
+        You can use the `conda-forge 
<https://anaconda.org/conda-forge/tesserocr>`_ channel to install from Conda:
         
         ::
         
-            > conda install -c simonflueckiger/label/tesseract-4.0.0-master 
tesserocr
+            > conda install -c conda-forge tesserocr
         
         pip
         ```
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr.pyx 
new/tesserocr-2.5.0/tesserocr.pyx
--- old/tesserocr-2.4.1/tesserocr.pyx   2019-08-23 18:01:38.000000000 +0200
+++ new/tesserocr-2.5.0/tesserocr.pyx   2019-11-08 23:49:38.000000000 +0100
@@ -18,7 +18,7 @@
  ['eng', 'osd', 'equ'])
 """
 
-__version__ = '2.4.1'
+__version__ = '2.5.0'
 
 import os
 from io import BytesIO
@@ -45,7 +45,7 @@
 
 # default parameters
 setMsgSeverity(L_SEVERITY_NONE)  # suppress leptonica error messages
-cdef TessBaseAPI _api = TessBaseAPI()
+cdef TessBaseAPI _api
 _api.SetVariable('debug_file', '/dev/null')  # suppress tesseract debug 
messages
 _api.Init(NULL, NULL)
 IF TESSERACT_VERSION >= 0x3999800:
@@ -67,7 +67,7 @@
 
 
 cdef class OEM(_Enum):
-    """An enum that defines avaialble OCR engine modes.
+    """An enum that defines available OCR engine modes.
 
     Attributes:
         TESSERACT_ONLY: Run Tesseract only - fastest
@@ -323,7 +323,7 @@
 cdef bytes _image_buffer(image):
     """Return raw bytes of a PIL Image"""
     with BytesIO() as f:
-        image.save(f, image.format or 'JPEG')
+        image.save(f, image.format or 'PNG')
         return f.getvalue()
 
 
@@ -831,6 +831,21 @@
         """
         return self._ltrriter.Confidence(level)
 
+    IF TESSERACT_VERSION >= 0x3040100:
+        def RowAttributes(self):
+            """Return row_height, descenders and ascenders in a dict"""
+            cdef:
+                float row_height
+                float descenders
+                float ascenders
+
+            self._ltrriter.RowAttributes(&row_height, &descenders, &ascenders)
+            return {
+                'row_height': row_height,
+                'descenders': descenders,
+                'ascenders': ascenders
+            }
+
     def WordFontAttributes(self):
         """Return the font attributes of the current word.
 
@@ -1138,7 +1153,7 @@
             applicable language, and there is more chance of hallucinating 
incorrect
             words.
         psm (int): Page segmentation mode. Defaults to :attr:`PSM.AUTO`.
-            See :class:`PSM` for avaialble psm values.
+            See :class:`PSM` for available psm values.
         init (bool): If ``False``, :meth:`Init` will not be called and has to 
be called
             after initialization.
         oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
@@ -1257,7 +1272,7 @@
     def GetIntVariable(self, name):
         """Return the value of the given int parameter if it exists among 
Tesseract parameters.
 
-        Returns ``None`` if the paramter was not found.
+        Returns ``None`` if the parameter was not found.
         """
         cdef:
             bytes py_name = _b(name)
@@ -1269,7 +1284,7 @@
     def GetBoolVariable(self, name):
         """Return the value of the given bool parameter if it exists among 
Tesseract parameters.
 
-        Returns ``None`` if the paramter was not found.
+        Returns ``None`` if the parameter was not found.
         """
         cdef:
             bytes py_name = _b(name)
@@ -1281,7 +1296,7 @@
     def GetDoubleVariable(self, name):
         """Return the value of the given double parameter if it exists among 
Tesseract parameters.
 
-        Returns ``None`` if the paramter was not found.
+        Returns ``None`` if the parameter was not found.
         """
         cdef:
             bytes py_name = _b(name)
@@ -1293,7 +1308,7 @@
     def GetStringVariable(self, name):
         """Return the value of the given string parameter if it exists among 
Tesseract parameters.
 
-        Returns ``None`` if the paramter was not found.
+        Returns ``None`` if the parameter was not found.
         """
         cdef:
             bytes py_name = _b(name)
@@ -1306,13 +1321,13 @@
         """Return the value of named variable as a string (regardless of type),
         if it exists.
 
-        Returns ``None`` if paramter was not found.
+        Returns ``None`` if parameter was not found.
         """
         cdef:
             bytes py_name = _b(name)
             STRING val
         if self._baseapi.GetVariableAsString(py_name, &val):
-            return val.string()
+            return val.c_str()
         return None
 
     def InitFull(self, path=_DEFAULT_PATH, lang=_DEFAULT_LANG,
@@ -1387,7 +1402,7 @@
              OcrEngineMode oem=OEM_DEFAULT):
         """Initialize the API with the given data path, language and OCR 
engine mode.
 
-        See :meth:`InitFull` for more intialization info and options.
+        See :meth:`InitFull` for more initialization info and options.
 
         Args:
             path (str): The name of the parent directory of tessdata.
@@ -1426,7 +1441,7 @@
         """
         cdef GenericVector[STRING] langs
         self._baseapi.GetLoadedLanguagesAsVector(&langs)
-        return [langs[i].string() for i in xrange(langs.size())]
+        return [langs[i].c_str() for i in xrange(langs.size())]
 
     def GetAvailableLanguages(self):
         """Return list of available languages in the init data path"""
@@ -1435,7 +1450,7 @@
             int i
         langs = []
         self._baseapi.GetAvailableLanguagesAsVector(&v)
-        langs = [v[i].string() for i in xrange(v.size())]
+        langs = [v[i].c_str() for i in xrange(v.size())]
         return langs
 
     def InitForAnalysePage(self):
@@ -2416,7 +2431,7 @@
         path (str): The name of the parent directory of tessdata.
             Must end in /.
         oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
-            see :class:`OEM` for all avaialble oem options.
+            see :class:`OEM` for all available oem options.
 
     Returns:
         unicode: The text extracted from the image.
@@ -2466,7 +2481,7 @@
         path (str): The name of the parent directory of tessdata.
             Must end in /.
         oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
-            see :class:`OEM` for all avaialble oem options.
+            see :class:`OEM` for all available oem options.
 
     Returns:
         unicode: The text extracted from the image.
@@ -2514,7 +2529,7 @@
             Must end in /. Default tesseract-ocr datapath is used
             if no path is provided.
 
-    Retruns
+    Returns
         tuple: Tuple with two elements:
             - path (str): tessdata parent directory path
             - languages (list): list of available languages as ISO 639-3 
strings.
@@ -2527,6 +2542,6 @@
     baseapi.Init(py_path, NULL)
     path = baseapi.GetDatapath()
     baseapi.GetAvailableLanguagesAsVector(&v)
-    langs = [v[i].string() for i in xrange(v.size())]
+    langs = [v[i].c_str() for i in xrange(v.size())]
     baseapi.End()
     return path, langs
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr_experiment.pyx 
new/tesserocr-2.5.0/tesserocr_experiment.pyx
--- old/tesserocr-2.4.1/tesserocr_experiment.pyx        2016-08-22 
15:27:56.000000000 +0200
+++ new/tesserocr-2.5.0/tesserocr_experiment.pyx        2019-11-08 
23:49:38.000000000 +0100
@@ -1,4 +1,4 @@
-# An attemp to address the PIL.Image buffer directly without copying it.
+# An attempt to address the PIL.Image buffer directly without copying it.
 #
 # This is achieved by extracting the buffer ptr from Image.im.unsafe_ptrs
 # the xsize, ysize, pixelsize and linesize are extracted as well to be used
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesserocr-2.4.1/tests/test_api.py 
new/tesserocr-2.5.0/tests/test_api.py
--- old/tesserocr-2.4.1/tests/test_api.py       2019-08-23 18:01:21.000000000 
+0200
+++ new/tesserocr-2.5.0/tests/test_api.py       2019-11-08 23:49:38.000000000 
+0100
@@ -326,6 +326,16 @@
         res = self._api.Recognize()
         self.assertTrue(res)
 
+    @unittest.skipIf(_TESSERACT_VERSION < 0x3040100, "tesseract < 4")
+    def test_row_attributes(self):
+        self._api.SetImageFile(self._image_file)
+        self._api.Recognize()
+        it = self._api.GetIterator()
+        attrs = it.RowAttributes()
+        self.assertIsInstance(attrs['row_height'], float)
+        self.assertIsInstance(attrs['ascenders'], float)
+        self.assertIsInstance(attrs['descenders'], float)
+
 
 if __name__ == '__main__':
     unittest.main()


Reply via email to