Hello community,
here is the log from the commit of package python-pytesseract for
openSUSE:Factory checked in at 2020-05-19 14:44:03
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-pytesseract (Old)
and /work/SRC/openSUSE:Factory/.python-pytesseract.new.2738 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-pytesseract"
Tue May 19 14:44:03 2020 rev:9 rq:804002 version:0.3.4
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-pytesseract/python-pytesseract.changes
2020-05-06 11:04:15.415261350 +0200
+++
/work/SRC/openSUSE:Factory/.python-pytesseract.new.2738/python-pytesseract.changes
2020-05-19 14:44:14.127525669 +0200
@@ -3,0 +4,4 @@
+- Update to 0.3.4:
+ - Support for WebP images
+ - Support for python 3.8 (CI testing)
+ - Improved cli error reporting
Old:
----
v0.3.3.tar.gz
New:
----
v0.3.4.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-pytesseract.spec ++++++
--- /var/tmp/diff_new_pack.TMK0NN/_old 2020-05-19 14:44:15.811529267 +0200
+++ /var/tmp/diff_new_pack.TMK0NN/_new 2020-05-19 14:44:15.815529276 +0200
@@ -18,7 +18,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-pytesseract
-Version: 0.3.3
+Version: 0.3.4
Release: 0
Summary: Python wrapper for Google's Tesseract-OCR
License: GPL-3.0-only
++++++ v0.3.3.tar.gz -> v0.3.4.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/.travis.yml
new/pytesseract-0.3.4/.travis.yml
--- old/pytesseract-0.3.3/.travis.yml 2020-03-01 20:42:01.000000000 +0100
+++ new/pytesseract-0.3.4/.travis.yml 2020-04-18 16:27:16.000000000 +0200
@@ -33,9 +33,9 @@
name: "3.7 Xenial"
- os: linux
dist: bionic
- python: 3.7
- env: TOXENV=py37-pre-commit
- name: "3.7 Bionic"
+ python: 3.8
+ env: TOXENV=py38-pre-commit
+ name: "3.8 Bionic"
before_install:
- sudo apt-get install -y tesseract-ocr
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/README.rst
new/pytesseract-0.3.4/README.rst
--- old/pytesseract-0.3.3/README.rst 2020-03-01 20:42:01.000000000 +0100
+++ new/pytesseract-0.3.4/README.rst 2020-04-18 16:27:16.000000000 +0200
@@ -1,10 +1,6 @@
Python Tesseract
================
-.. image:: https://travis-ci.org/madmaze/pytesseract.svg
- :target: https://travis-ci.org/madmaze/pytesseract
- :alt: Travis build status
-
.. image:: https://img.shields.io/pypi/pyversions/pytesseract.svg
:target: https://pypi.python.org/pypi/pytesseract
:alt: Python versions
@@ -21,6 +17,10 @@
:target: https://anaconda.org/conda-forge/pytesseract
:alt: Conda release
+.. image:: https://travis-ci.org/madmaze/pytesseract.svg
+ :target: https://travis-ci.org/madmaze/pytesseract
+ :alt: Travis build status
+
Python-tesseract is an optical character recognition (OCR) tool for python.
That is, it will recognize and "read" the text embedded in images.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/setup.py
new/pytesseract-0.3.4/setup.py
--- old/pytesseract-0.3.3/setup.py 2020-03-01 20:42:01.000000000 +0100
+++ new/pytesseract-0.3.4/setup.py 2020-04-18 16:27:16.000000000 +0200
@@ -14,7 +14,7 @@
setup(
name=PACKAGE_NAME,
- version='0.3.3',
+ version='0.3.4',
author='Samuel Hoffstaetter',
author_email='[email protected]',
maintainer='Matthias Lee',
@@ -42,5 +42,6 @@
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
],
)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/src/pytesseract.py
new/pytesseract-0.3.4/src/pytesseract.py
--- old/pytesseract-0.3.3/src/pytesseract.py 2020-03-01 20:42:01.000000000
+0100
+++ new/pytesseract-0.3.4/src/pytesseract.py 2020-04-18 16:27:16.000000000
+0200
@@ -34,7 +34,17 @@
import pandas as pd
RGB_MODE = 'RGB'
-SUPPORTED_FORMATS = {'JPEG', 'PNG', 'PBM', 'PGM', 'PPM', 'TIFF', 'BMP', 'GIF'}
+SUPPORTED_FORMATS = {
+ 'JPEG',
+ 'PNG',
+ 'PBM',
+ 'PGM',
+ 'PPM',
+ 'TIFF',
+ 'BMP',
+ 'GIF',
+ 'WEBP',
+}
OSD_KEYS = {
'Page number': ('page_num', int),
@@ -68,7 +78,7 @@
class TesseractNotFoundError(EnvironmentError):
def __init__(self):
super(TesseractNotFoundError, self).__init__(
- tesseract_cmd + " is not installed or it's not in your path",
+ tesseract_cmd + " is not installed or it's not in your PATH",
)
@@ -457,6 +467,9 @@
try:
with Image.open(filename) as img:
print(image_to_string(img, lang=lang))
+ except TesseractNotFoundError as e:
+ sys.stderr.write('{}\n'.format(str(e)))
+ exit(1)
except IOError:
sys.stderr.write('ERROR: Could not open file "%s"\n' % filename)
exit(1)
Binary files old/pytesseract-0.3.3/tests/data/test.webp and
new/pytesseract-0.3.4/tests/data/test.webp differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/tests/pytesseract_test.py
new/pytesseract-0.3.4/tests/pytesseract_test.py
--- old/pytesseract-0.3.3/tests/pytesseract_test.py 2020-03-01
20:42:01.000000000 +0100
+++ new/pytesseract-0.3.4/tests/pytesseract_test.py 2020-04-18
16:27:16.000000000 +0200
@@ -49,6 +49,11 @@
@pytest.fixture(scope='session')
+def test_invalid_file():
+ return TEST_JPEG + 'invalid'
+
+
[email protected](scope='session')
def test_file_european():
return path.join(DATA_DIR, 'test-european.jpg')
@@ -62,6 +67,7 @@
'test.ppm',
'test.tiff',
'test.gif',
+ 'test.webp',
# 'test.bmp', # https://github.com/tesseract-ocr/tesseract/issues/2558
],
ids=[
@@ -71,6 +77,7 @@
'ppm',
'tiff',
'gif',
+ 'webp',
# 'bmp',
],
)
@@ -119,7 +126,14 @@
def test_image_to_string_multiprocessing():
"""Test parallel system calls."""
- test_files = ['test.jpg', 'test.pgm', 'test.png', 'test.ppm', 'test.tiff']
+ test_files = [
+ 'test.jpg',
+ 'test.pgm',
+ 'test.png',
+ 'test.ppm',
+ 'test.tiff',
+ 'test.webp',
+ ]
test_files = [path.join(DATA_DIR, test_file) for test_file in test_files]
p = Pool(2)
results = p.map(image_to_string, test_files)
@@ -271,15 +285,36 @@
[r'wrong_tesseract', getcwd() + path.sep + r'wrong_tesseract'],
ids=['executable_name', 'absolute_path'],
)
-def test_wrong_tesseract_cmd(test_file, test_path):
+def test_wrong_tesseract_cmd(monkeypatch, test_file, test_path):
"""Test wrong or missing tesseract command."""
import pytesseract
- pytesseract.pytesseract.tesseract_cmd = test_path
+ monkeypatch.setattr(
+ 'pytesseract.pytesseract.tesseract_cmd', test_path,
+ )
with pytest.raises(TesseractNotFoundError):
pytesseract.pytesseract.image_to_string(test_file)
- pytesseract.pytesseract.tesseract_cmd = (
- 'tesseract' # restore the def value
+
+
+def test_main_not_found_cases(
+ capsys, monkeypatch, test_file, test_invalid_file,
+):
+ """Test wrong or missing tesseract command in main."""
+ import pytesseract
+
+ monkeypatch.setattr('sys.argv', ['', test_invalid_file])
+ with pytest.raises(SystemExit):
+ pytesseract.pytesseract.main()
+ assert capsys.readouterr().err.startswith('ERROR: Could not open file')
+
+ monkeypatch.setattr(
+ 'pytesseract.pytesseract.tesseract_cmd', 'wrong_tesseract',
+ )
+ monkeypatch.setattr('sys.argv', ['', test_file])
+ with pytest.raises(SystemExit):
+ pytesseract.pytesseract.main()
+ assert capsys.readouterr().err.endswith(
+ "is not installed or it's not in your PATH\n",
)
@@ -288,15 +323,14 @@
[path.sep + r'wrong_tesseract', r''],
ids=['permission_error_path', 'invalid_path'],
)
-def test_proper_oserror_exception_handling(test_file, test_path):
+def test_proper_oserror_exception_handling(monkeypatch, test_file, test_path):
""""Test for bubbling up OSError exceptions."""
import pytesseract
- pytesseract.pytesseract.tesseract_cmd = test_path
+ monkeypatch.setattr(
+ 'pytesseract.pytesseract.tesseract_cmd', test_path,
+ )
with pytest.raises(
TesseractNotFoundError if IS_PYTHON_2 and test_path else OSError,
):
pytesseract.pytesseract.image_to_string(test_file)
- pytesseract.pytesseract.tesseract_cmd = (
- 'tesseract' # restore the def value
- )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/pytesseract-0.3.3/tox.ini
new/pytesseract-0.3.4/tox.ini
--- old/pytesseract-0.3.3/tox.ini 2020-03-01 20:42:01.000000000 +0100
+++ new/pytesseract-0.3.4/tox.ini 2020-04-18 16:27:16.000000000 +0200
@@ -3,7 +3,8 @@
py27
py35
py36
- py37-pre-commit
+ py37
+ py38-pre-commit
skip_missing_interpreters = true
[pytest]
@@ -18,7 +19,7 @@
commands =
python -bb -m pytest
-[testenv:py37-pre-commit]
+[testenv:py38-pre-commit]
deps =
numpy
pandas