Hello community, here is the log from the commit of package python-tesserocr for openSUSE:Factory checked in at 2019-12-03 15:21:47 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-tesserocr (Old) and /work/SRC/openSUSE:Factory/.python-tesserocr.new.4691 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-tesserocr" Tue Dec 3 15:21:47 2019 rev:8 rq:753070 version:2.5.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-tesserocr/python-tesserocr.changes 2019-08-24 18:47:34.657750513 +0200 +++ /work/SRC/openSUSE:Factory/.python-tesserocr.new.4691/python-tesserocr.changes 2019-12-03 15:21:50.130521845 +0100 @@ -1,0 +2,9 @@ +Tue Nov 26 00:49:44 UTC 2019 - Martin Herkt <9+suse@cirno.systems> + +- Update to version 2.5.0 + * Support for RowAttributes method in LTRResultIterator + * SetImage: use PNG instead of JPEG fallback + * Replace STRING::string() by c_str() + * Don't use assignment operator for TessBaseAPI + +------------------------------------------------------------------- Old: ---- tesserocr-2.4.1.tar.gz New: ---- tesserocr-2.5.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-tesserocr.spec ++++++ --- /var/tmp/diff_new_pack.VK66EF/_old 2019-12-03 15:21:50.822521528 +0100 +++ /var/tmp/diff_new_pack.VK66EF/_new 2019-12-03 15:21:50.826521526 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-tesserocr # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-tesserocr -Version: 2.4.1 +Version: 2.5.0 Release: 0 Summary: A Python wrapper around tesseract-ocr License: MIT ++++++ tesserocr-2.4.1.tar.gz -> tesserocr-2.5.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/PKG-INFO new/tesserocr-2.5.0/PKG-INFO --- old/tesserocr-2.4.1/PKG-INFO 2019-08-23 18:03:12.000000000 +0200 +++ new/tesserocr-2.5.0/PKG-INFO 2019-11-09 00:11:54.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tesserocr -Version: 2.4.1 +Version: 2.5.0 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API using Cython Home-page: https://github.com/sirfz/tesserocr Author: Fayez Zouheiry @@ -88,20 +88,16 @@ The proposed downloads consist of stand-alone packages containing all the Windows libraries needed for execution. This means that no additional installation of tesseract is required on your system. + The recommended method of installation is via Conda as described below. + Conda ````` - You can use the channel `simonflueckiger <https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda: - - :: - - > conda install -c simonflueckiger tesserocr - - or to get **tesserocr** compiled with **tesseract 4.0.0**: + You can use the `conda-forge <https://anaconda.org/conda-forge/tesserocr>`_ channel to install from Conda: :: - > conda install -c simonflueckiger/label/tesseract-4.0.0-master tesserocr + > conda install -c conda-forge tesserocr pip ``` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/README.rst new/tesserocr-2.5.0/README.rst --- old/tesserocr-2.4.1/README.rst 2019-08-23 18:01:21.000000000 +0200 +++ new/tesserocr-2.5.0/README.rst 2019-11-09 00:11:39.000000000 +0100 @@ -80,20 +80,16 @@ The proposed downloads consist of stand-alone packages containing all the Windows libraries needed for execution. This means that no additional installation of tesseract is required on your system. +The recommended method of installation is via Conda as described below. + Conda ````` -You can use the channel `simonflueckiger <https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda: - -:: - - > conda install -c simonflueckiger tesserocr - -or to get **tesserocr** compiled with **tesseract 4.0.0**: +You can use the `conda-forge <https://anaconda.org/conda-forge/tesserocr>`_ channel to install from Conda: :: - > conda install -c simonflueckiger/label/tesseract-4.0.0-master tesserocr + > conda install -c conda-forge tesserocr pip ``` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/tesseract.pxd new/tesserocr-2.5.0/tesseract.pxd --- old/tesserocr-2.4.1/tesseract.pxd 2019-08-23 18:01:21.000000000 +0200 +++ new/tesserocr-2.5.0/tesseract.pxd 2019-11-08 23:49:38.000000000 +0100 @@ -110,7 +110,7 @@ cdef extern from "tesseract/strngs.h" nogil: cdef cppclass STRING: - cchar_t *string() const + cchar_t *c_str() const STRING &operator=(cchar_t *) cdef extern from "tesseract/ocrclass.h" nogil: @@ -149,6 +149,7 @@ void SetLineSeparator(cchar_t *) void SetParagraphSeparator(cchar_t *) float Confidence(PageIteratorLevel) const + void RowAttributes(float *, float *, float *) const cchar_t *WordFontAttributes(bool *, bool *, bool *, bool *, bool *, bool *, int *, int *) const cchar_t *WordRecognitionLanguage() const StrongScriptDirection WordDirection() const @@ -157,6 +158,29 @@ bool WordIsNumeric() const bool HasBlamerInfo() const cchar_t *GetBlamerDebug() const + cchar_t *GetBlamerMisadaptionDebug() const + bool HasTruthString() const + bool EquivalentToTruth(cchar_t *) const + char *WordTruthUTF8Text() const + char *WordNormedUTF8Text() const + cchar_t *WordLattice(int *) const + bool SymbolIsSuperscript() const + bool SymbolIsSubscript() const + bool SymbolIsDropcap() const + ELIF TESSERACT_VERSION >= 0x3040100: + cdef cppclass LTRResultIterator(PageIterator): + char *GetUTF8Text(PageIteratorLevel) const + void SetLineSeparator(cchar_t *) + void SetParagraphSeparator(cchar_t *) + float Confidence(PageIteratorLevel) const + void RowAttributes(float *, float *, float *) const + cchar_t *WordFontAttributes(bool *, bool *, bool *, bool *, bool *, bool *, int *, int *) const + cchar_t *WordRecognitionLanguage() const + StrongScriptDirection WordDirection() const + bool WordIsFromDictionary() const + bool WordIsNumeric() const + bool HasBlamerInfo() const + cchar_t *GetBlamerDebug() const cchar_t *GetBlamerMisadaptionDebug() const bool HasTruthString() const bool EquivalentToTruth(cchar_t *) const diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO new/tesserocr-2.5.0/tesserocr.egg-info/PKG-INFO --- old/tesserocr-2.4.1/tesserocr.egg-info/PKG-INFO 2019-08-23 18:03:11.000000000 +0200 +++ new/tesserocr-2.5.0/tesserocr.egg-info/PKG-INFO 2019-11-09 00:11:54.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tesserocr -Version: 2.4.1 +Version: 2.5.0 Summary: A simple, Pillow-friendly, Python wrapper around tesseract-ocr API using Cython Home-page: https://github.com/sirfz/tesserocr Author: Fayez Zouheiry @@ -88,20 +88,16 @@ The proposed downloads consist of stand-alone packages containing all the Windows libraries needed for execution. This means that no additional installation of tesseract is required on your system. + The recommended method of installation is via Conda as described below. + Conda ````` - You can use the channel `simonflueckiger <https://anaconda.org/simonflueckiger/tesserocr>`_ to install from Conda: - - :: - - > conda install -c simonflueckiger tesserocr - - or to get **tesserocr** compiled with **tesseract 4.0.0**: + You can use the `conda-forge <https://anaconda.org/conda-forge/tesserocr>`_ channel to install from Conda: :: - > conda install -c simonflueckiger/label/tesseract-4.0.0-master tesserocr + > conda install -c conda-forge tesserocr pip ``` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr.pyx new/tesserocr-2.5.0/tesserocr.pyx --- old/tesserocr-2.4.1/tesserocr.pyx 2019-08-23 18:01:38.000000000 +0200 +++ new/tesserocr-2.5.0/tesserocr.pyx 2019-11-08 23:49:38.000000000 +0100 @@ -18,7 +18,7 @@ ['eng', 'osd', 'equ']) """ -__version__ = '2.4.1' +__version__ = '2.5.0' import os from io import BytesIO @@ -45,7 +45,7 @@ # default parameters setMsgSeverity(L_SEVERITY_NONE) # suppress leptonica error messages -cdef TessBaseAPI _api = TessBaseAPI() +cdef TessBaseAPI _api _api.SetVariable('debug_file', '/dev/null') # suppress tesseract debug messages _api.Init(NULL, NULL) IF TESSERACT_VERSION >= 0x3999800: @@ -67,7 +67,7 @@ cdef class OEM(_Enum): - """An enum that defines avaialble OCR engine modes. + """An enum that defines available OCR engine modes. Attributes: TESSERACT_ONLY: Run Tesseract only - fastest @@ -323,7 +323,7 @@ cdef bytes _image_buffer(image): """Return raw bytes of a PIL Image""" with BytesIO() as f: - image.save(f, image.format or 'JPEG') + image.save(f, image.format or 'PNG') return f.getvalue() @@ -831,6 +831,21 @@ """ return self._ltrriter.Confidence(level) + IF TESSERACT_VERSION >= 0x3040100: + def RowAttributes(self): + """Return row_height, descenders and ascenders in a dict""" + cdef: + float row_height + float descenders + float ascenders + + self._ltrriter.RowAttributes(&row_height, &descenders, &ascenders) + return { + 'row_height': row_height, + 'descenders': descenders, + 'ascenders': ascenders + } + def WordFontAttributes(self): """Return the font attributes of the current word. @@ -1138,7 +1153,7 @@ applicable language, and there is more chance of hallucinating incorrect words. psm (int): Page segmentation mode. Defaults to :attr:`PSM.AUTO`. - See :class:`PSM` for avaialble psm values. + See :class:`PSM` for available psm values. init (bool): If ``False``, :meth:`Init` will not be called and has to be called after initialization. oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`. @@ -1257,7 +1272,7 @@ def GetIntVariable(self, name): """Return the value of the given int parameter if it exists among Tesseract parameters. - Returns ``None`` if the paramter was not found. + Returns ``None`` if the parameter was not found. """ cdef: bytes py_name = _b(name) @@ -1269,7 +1284,7 @@ def GetBoolVariable(self, name): """Return the value of the given bool parameter if it exists among Tesseract parameters. - Returns ``None`` if the paramter was not found. + Returns ``None`` if the parameter was not found. """ cdef: bytes py_name = _b(name) @@ -1281,7 +1296,7 @@ def GetDoubleVariable(self, name): """Return the value of the given double parameter if it exists among Tesseract parameters. - Returns ``None`` if the paramter was not found. + Returns ``None`` if the parameter was not found. """ cdef: bytes py_name = _b(name) @@ -1293,7 +1308,7 @@ def GetStringVariable(self, name): """Return the value of the given string parameter if it exists among Tesseract parameters. - Returns ``None`` if the paramter was not found. + Returns ``None`` if the parameter was not found. """ cdef: bytes py_name = _b(name) @@ -1306,13 +1321,13 @@ """Return the value of named variable as a string (regardless of type), if it exists. - Returns ``None`` if paramter was not found. + Returns ``None`` if parameter was not found. """ cdef: bytes py_name = _b(name) STRING val if self._baseapi.GetVariableAsString(py_name, &val): - return val.string() + return val.c_str() return None def InitFull(self, path=_DEFAULT_PATH, lang=_DEFAULT_LANG, @@ -1387,7 +1402,7 @@ OcrEngineMode oem=OEM_DEFAULT): """Initialize the API with the given data path, language and OCR engine mode. - See :meth:`InitFull` for more intialization info and options. + See :meth:`InitFull` for more initialization info and options. Args: path (str): The name of the parent directory of tessdata. @@ -1426,7 +1441,7 @@ """ cdef GenericVector[STRING] langs self._baseapi.GetLoadedLanguagesAsVector(&langs) - return [langs[i].string() for i in xrange(langs.size())] + return [langs[i].c_str() for i in xrange(langs.size())] def GetAvailableLanguages(self): """Return list of available languages in the init data path""" @@ -1435,7 +1450,7 @@ int i langs = [] self._baseapi.GetAvailableLanguagesAsVector(&v) - langs = [v[i].string() for i in xrange(v.size())] + langs = [v[i].c_str() for i in xrange(v.size())] return langs def InitForAnalysePage(self): @@ -2416,7 +2431,7 @@ path (str): The name of the parent directory of tessdata. Must end in /. oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`. - see :class:`OEM` for all avaialble oem options. + see :class:`OEM` for all available oem options. Returns: unicode: The text extracted from the image. @@ -2466,7 +2481,7 @@ path (str): The name of the parent directory of tessdata. Must end in /. oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`. - see :class:`OEM` for all avaialble oem options. + see :class:`OEM` for all available oem options. Returns: unicode: The text extracted from the image. @@ -2514,7 +2529,7 @@ Must end in /. Default tesseract-ocr datapath is used if no path is provided. - Retruns + Returns tuple: Tuple with two elements: - path (str): tessdata parent directory path - languages (list): list of available languages as ISO 639-3 strings. @@ -2527,6 +2542,6 @@ baseapi.Init(py_path, NULL) path = baseapi.GetDatapath() baseapi.GetAvailableLanguagesAsVector(&v) - langs = [v[i].string() for i in xrange(v.size())] + langs = [v[i].c_str() for i in xrange(v.size())] baseapi.End() return path, langs diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/tesserocr_experiment.pyx new/tesserocr-2.5.0/tesserocr_experiment.pyx --- old/tesserocr-2.4.1/tesserocr_experiment.pyx 2016-08-22 15:27:56.000000000 +0200 +++ new/tesserocr-2.5.0/tesserocr_experiment.pyx 2019-11-08 23:49:38.000000000 +0100 @@ -1,4 +1,4 @@ -# An attemp to address the PIL.Image buffer directly without copying it. +# An attempt to address the PIL.Image buffer directly without copying it. # # This is achieved by extracting the buffer ptr from Image.im.unsafe_ptrs # the xsize, ysize, pixelsize and linesize are extracted as well to be used diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesserocr-2.4.1/tests/test_api.py new/tesserocr-2.5.0/tests/test_api.py --- old/tesserocr-2.4.1/tests/test_api.py 2019-08-23 18:01:21.000000000 +0200 +++ new/tesserocr-2.5.0/tests/test_api.py 2019-11-08 23:49:38.000000000 +0100 @@ -326,6 +326,16 @@ res = self._api.Recognize() self.assertTrue(res) + @unittest.skipIf(_TESSERACT_VERSION < 0x3040100, "tesseract < 4") + def test_row_attributes(self): + self._api.SetImageFile(self._image_file) + self._api.Recognize() + it = self._api.GetIterator() + attrs = it.RowAttributes() + self.assertIsInstance(attrs['row_height'], float) + self.assertIsInstance(attrs['ascenders'], float) + self.assertIsInstance(attrs['descenders'], float) + if __name__ == '__main__': unittest.main()