Date: Wednesday, December 19, 2018 @ 23:23:16 Author: bgyorgy Revision: 416799
Move paperwork from AUR with 26 votes Also move its dependencies: libpillowfight, python-pyinsane, python-pyocr, python-simplebayes. Added: libpillowfight/ libpillowfight/trunk/ libpillowfight/trunk/PKGBUILD paperwork/ paperwork/trunk/ paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch paperwork/trunk/PKGBUILD python-pyinsane/ python-pyinsane/trunk/ python-pyinsane/trunk/PKGBUILD python-pyocr/ python-pyocr/trunk/ python-pyocr/trunk/PKGBUILD python-simplebayes/ python-simplebayes/trunk/ python-simplebayes/trunk/PKGBUILD ---------------------------------------------------------------------------------+ libpillowfight/trunk/PKGBUILD | 50 +++++ paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch | 37 ++++ paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch | 39 ++++ paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch | 25 ++ paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch | 87 ++++++++++ paperwork/trunk/PKGBUILD | 62 +++++++ python-pyinsane/trunk/PKGBUILD | 30 +++ python-pyocr/trunk/PKGBUILD | 32 +++ python-simplebayes/trunk/PKGBUILD | 51 +++++ 9 files changed, 413 insertions(+) Added: libpillowfight/trunk/PKGBUILD =================================================================== --- libpillowfight/trunk/PKGBUILD (rev 0) +++ libpillowfight/trunk/PKGBUILD 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,50 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +pkgbase=libpillowfight +pkgname=(libpillowfight python-pillowfight) +pkgver=0.2.4 +pkgrel=1 +pkgdesc="Small library containing various image processing algorithms" +arch=(x86_64) +url="https://gitlab.gnome.org/World/OpenPaperwork/libpillowfight" +license=(GPL2) +depends=(glibc python-pillow) +makedepends=(cmake git python-setuptools) +_commit=3a7ebba75f4c060ac1707900bd8891e8d200b1c6 # tags/0.2.4^0 +source=("git+https://gitlab.gnome.org/World/OpenPaperwork/libpillowfight.git#commit=$_commit") +sha256sums=('SKIP') + +pkgver() { + cd $pkgbase + git describe --tags | sed 's/-/+/g' +} + +prepare() { + [[ -d build ]] || mkdir build +} + + +build() { + cd $pkgbase + make build_py + + cd ../build + cmake -G "Unix Makefiles" ../$pkgbase/ \ + -DCMAKE_INSTALL_PREFIX=/usr + make + +} + +package_libpillowfight() { + depends=(glibc) + + cd build + make DESTDIR="$pkgdir" install +} + +package_python-pillowfight() { + depends=(python-pillow) + + cd $pkgbase + python3 setup.py install --root="$pkgdir" --optimize=1 +} Property changes on: libpillowfight/trunk/PKGBUILD ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Added: paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch =================================================================== --- paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch (rev 0) +++ paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,37 @@ +From d1da8cd20554aa1d3c766855c251a5a643d2abb4 Mon Sep 17 00:00:00 2001 +From: Jonas Wloka <c...@jowlo.de> +Date: Sat, 17 Nov 2018 19:54:20 +0100 +Subject: [PATCH] Filter out boxes that start at (0, 0) + +Tesseract returns way too large boxes that cover the whole page, mostly +containing only a single special character. All of these boxes (in my +tests) have coordinate 0 0. + +This filters out all of these boxes. +--- + paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +index 35c6b7f4..e324c670 100644 +--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py ++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +@@ -251,10 +251,14 @@ class JobPageBoxesLoader(Job): + boxes = set() + for line in line_boxes: + for word in line.word_boxes: +- if word.content.strip() == "": ++ if word.content.strip() == "" or (word.position[0][0] == 0 ++ and word.position[0][1] == 0): + # XXX(Jflesch): Tesseract 3.03 (hOCR) returns big and + # empty word boxes sometimes (just a single space + # inside). They often match images, but not always. ++ # XXX(jowlo): Tesseract returns large boxes containing ++ # single letters (mostly special chars) that cover the ++ # whole page. All of these start at (0 0) + continue + boxes.add(word) + +-- +2.20.0 + Added: paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch =================================================================== --- paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch (rev 0) +++ paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,39 @@ +From 318d9ef80a7dc21da7ad45fa46c11c8fb19ec8fb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballog...@gmail.com> +Date: Wed, 19 Dec 2018 16:59:46 +0100 +Subject: [PATCH] Filter out too large boxes on selection + +Tesseract returns way too large boxes that cover the whole page, mostly containing only a single special character. + +This is a complement for commit d1da8cd2 to filter out all of these boxes also on text selection. +--- + paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +index e324c670..b022b178 100644 +--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py ++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +@@ -731,6 +731,9 @@ class SimplePageDrawer(Drawer): + index = 0 + for line in self.boxes['lines']: + for box in line.word_boxes: ++ if box.content.strip() == "" or (box.position[0][0] == 0 ++ and box.position[0][1] == 0): ++ continue + rcx = (box.position[0][0] + box.position[1][0]) / 2 + rcy = (box.position[0][1] + box.position[1][1]) / 2 + w = box.position[1][0] - box.position[0][0] +@@ -767,6 +770,9 @@ class SimplePageDrawer(Drawer): + selected = [] + for line in self.boxes['lines']: + for box in line.word_boxes: ++ if box.content.strip() == "" or (box.position[0][0] == 0 ++ and box.position[0][1] == 0): ++ continue + if box == box_start: + in_list = True + if in_list: +-- +2.20.0 + Added: paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch =================================================================== --- paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch (rev 0) +++ paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,25 @@ +From 2ffb468a41d28eceda3afe869dd8c5af70203bf4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballog...@gmail.com> +Date: Wed, 19 Dec 2018 17:19:27 +0100 +Subject: [PATCH] Fix importing PNG files with transparency + +Pillow does not allow to save images with transparency to JPEG. These images need to be converted first. +--- + paperwork-backend/paperwork_backend/img/page.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/paperwork-backend/paperwork_backend/img/page.py b/paperwork-backend/paperwork_backend/img/page.py +index ddbb9214..86d5e4be 100644 +--- a/paperwork-backend/paperwork_backend/img/page.py ++++ b/paperwork-backend/paperwork_backend/img/page.py +@@ -141,6 +141,7 @@ class ImgPage(BasicPage): + + def __set_img(self, img): + with self.fs.open(self.__img_path, 'wb') as fd: ++ img = img.convert("RGB") + img.save(fd, format="JPEG") + + img = property(__get_img, __set_img) +-- +2.20.0 + Added: paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch =================================================================== --- paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (rev 0) +++ paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,87 @@ +From ad4555f9904805e4fd56a30ca6529536edb0f919 Mon Sep 17 00:00:00 2001 +From: Jerome Flesch <jfle...@openpaper.work> +Date: Wed, 19 Dec 2018 22:38:59 +0100 +Subject: [PATCH] util: find_language(): New versions of pycountry do not raise + exception when a language is not found, they return None. Ref #812 + +Signed-off-by: Jerome Flesch <jfle...@openpaper.work> +--- + paperwork-backend/paperwork_backend/util.py | 60 ++++++++------------- + 1 file changed, 23 insertions(+), 37 deletions(-) + +diff --git a/paperwork-backend/paperwork_backend/util.py b/paperwork-backend/paperwork_backend/util.py +index 90fd0eb9..73701c8a 100644 +--- a/paperwork-backend/paperwork_backend/util.py ++++ b/paperwork-backend/paperwork_backend/util.py +@@ -312,45 +312,31 @@ def find_language(lang_str=None, allow_none=False): + lang_str = lang_str.split("_")[0] + + try: +- return pycountry.pycountry.languages.get(name=lang_str.title()) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso_639_3_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_3_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_2T_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_1_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(terminology=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(bibliographic=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha_3=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha_2=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha2=lang_str) ++ r = pycountry.pycountry.languages.get(name=lang_str.title()) ++ if r is not None: ++ return r + except (KeyError, UnicodeDecodeError): + pass ++ ++ ATTRS = ( ++ 'iso_639_3_code', ++ 'iso639_3_code', ++ 'iso639_2T_code', ++ 'iso639_1_code', ++ 'terminology', ++ 'bibliographic', ++ 'alpha_3', ++ 'alpha_2', ++ 'alpha2' ++ ) ++ for attr in ATTRS: ++ try: ++ r = pycountry.pycountry.languages.get(**{attr: lang_str}) ++ if r is not None: ++ return r ++ except (KeyError, UnicodeDecodeError): ++ pass ++ + if allow_none: + logger.warning("Unknown language [{}]".format(lang_str)) + return None +-- +2.20.0 + Added: paperwork/trunk/PKGBUILD =================================================================== --- paperwork/trunk/PKGBUILD (rev 0) +++ paperwork/trunk/PKGBUILD 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,62 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +pkgname=paperwork +pkgver=1.2.4 +pkgrel=3 +pkgdesc="Personal document manager for GNOME to manage scanned documents and PDFs" +arch=(any) +url="https://openpaper.work/" +license=(GPL3) +depends=(gtk3 libnotify poppler-glib python-cairo python-dateutil python-gobject + python-levenshtein python-natsort python-pillowfight python-pycountry python-pyenchant + python-pyinsane python-pyocr python-setuptools python-simplebayes python-termcolor + python-whoosh python-xdg) +makedepends=(git) +_commit=36c00eaac636c6fa6db58f2f86a01e1c1abf2398 # tags/1.2.4^0 +source=("git+https://gitlab.gnome.org/World/OpenPaperwork/paperwork.git#commit=$_commit" + 0001-Filter-out-boxes-that-start-at-0-0.patch + 0001-Filter-out-too-large-boxes-on-selection.patch + 0001-Fix-importing-PNG-files-with-transparency.patch + 0001-util-find_language-New-versions-of-pycountry-do-not-.patch) +sha256sums=('SKIP' + '8fb8b760fba68c393b33bf2f2b52c6e7edf46d8958863c8f02c4f988842d84e3' + '1e11b2c18181b9ab36f51f25ff2e489759ae12245de41a4dcf377def400afe99' + 'aa867fb97da9a53c0e2beb758c26881dc2a16265e9b2deaa83e6fe1443ec8260' + '00f501fb42abe44ec34982fbfd0f71cb4a589e0cc4bb983e0bfa50156fc30d3c') + +pkgver() { + cd $pkgname + git describe --tags | sed 's/-/+/g' +} + +prepare() { + cd $pkgname + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/781 + patch -Np1 -i ../0001-Filter-out-boxes-that-start-at-0-0.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/782 + patch -Np1 -i ../0001-Filter-out-too-large-boxes-on-selection.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/783 + patch -Np1 -i ../0001-Fix-importing-PNG-files-with-transparency.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/issues/812 + patch -Np1 -i ../0001-util-find_language-New-versions-of-pycountry-do-not-.patch +} + +build() { + cd $pkgname + make +} + +package() { + cd $pkgname/paperwork-backend + python3 setup.py install --root="$pkgdir" --optimize=1 + + cd ../paperwork-gtk + python3 setup.py install --root="$pkgdir" --optimize=1 + + cd "$pkgdir"/usr/lib/python3.7/site-packages/paperwork/frontend/ + PYTHONPATH=`echo "$pkgdir"/usr/lib/python*/site-packages/` python3 -c 'import shell; shell.install_system(icon_basedir="../../../../../share/icons", data_basedir="../../../../../share")' +} Property changes on: paperwork/trunk/PKGBUILD ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Added: python-pyinsane/trunk/PKGBUILD =================================================================== --- python-pyinsane/trunk/PKGBUILD (rev 0) +++ python-pyinsane/trunk/PKGBUILD 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,30 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +_pkgname=pyinsane +pkgname=python-pyinsane +pkgver=2.0.13 +pkgrel=1 +pkgdesc="Python library to access and use image scanners" +arch=(any) +url="https://gitlab.gnome.org/World/OpenPaperwork/pyinsane" +license=(GPL3) +depends=(python-pillow sane) +makedepends=(git python-setuptools) +_commit=3e509e6bdd2c07ac715cfc27946f86123744a46e # tags/2.0.13^0 +source=("git+https://gitlab.gnome.org/World/OpenPaperwork/pyinsane.git#commit=$_commit") +sha256sums=('SKIP') + +pkgver() { + cd $_pkgname + git describe --tags | sed 's/-/+/g' +} + +build() { + cd $_pkgname + make +} + +package() { + cd $_pkgname + python3 setup.py install --root="$pkgdir" --optimize=1 +} Property changes on: python-pyinsane/trunk/PKGBUILD ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Added: python-pyocr/trunk/PKGBUILD =================================================================== --- python-pyocr/trunk/PKGBUILD (rev 0) +++ python-pyocr/trunk/PKGBUILD 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,32 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +_pkgname=pyocr +pkgname=python-pyocr +pkgver=0.5.3 +pkgrel=2 +pkgdesc="Python wrapper for Tesseract and Cuneiform" +arch=(any) +url="https://gitlab.gnome.org/World/OpenPaperwork/pyocr" +license=(GPL3) +depends=(python-pillow python-six) +makedepends=(git python-setuptools) +optdepends=('cuneiform: OCR backend' + 'tesseract: OCR backend') +_commit=5abd0a566a0518bea00cb4247c16e67d0d3c2d65 # tags/0.5.3^0 +source=("git+https://gitlab.gnome.org/World/OpenPaperwork/pyocr.git#commit=$_commit") +sha256sums=('SKIP') + +pkgver() { + cd $_pkgname + git describe --tags | sed 's/-/+/g' +} + +build() { + cd $_pkgname + make +} + +package() { + cd $_pkgname + python3 setup.py install --root="$pkgdir" --optimize=1 +} Property changes on: python-pyocr/trunk/PKGBUILD ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Added: python-simplebayes/trunk/PKGBUILD =================================================================== --- python-simplebayes/trunk/PKGBUILD (rev 0) +++ python-simplebayes/trunk/PKGBUILD 2018-12-19 23:23:16 UTC (rev 416799) @@ -0,0 +1,51 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +_pkgbase=simplebayes +pkgbase=python-simplebayes +pkgname=(python2-simplebayes python-simplebayes) +pkgver=1.5.7 +pkgrel=1 +pkgdesc="Memory-based, optional-persistence naïve bayesian text classifier Python library" +arch=(any) +url="https://github.com/hickeroar/simplebayes" +license=(MIT) +depends=(python python2) +makedepends=(git python-setuptools python2-setuptools) +_commit=b8da72c50d20b6f8c0df2c2f39620715b08ddd32 # tags/1.5.7 +source=("git+https://github.com/hickeroar/simplebayes#commit=$_commit") +sha256sums=('SKIP') + +pkgver() { + cd $_pkgbase + git describe --tags | sed 's/-/+/g' +} + +prepare() { + cp -a $_pkgbase{,-py2} +} + +build() { + # Building Python2 + cd $_pkgbase-py2 + python2 setup.py build + + # Building Python3 + cd ../$_pkgbase + python3 setup.py build +} + +package_python2-simplebayes() { + depends=(python2) + + cd $_pkgbase-py2 + python2 setup.py install --root="$pkgdir" --optimize=1 + install -Dm644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE" +} + +package_python-simplebayes() { + depends=(python) + + cd $_pkgbase + python3 setup.py install --root="$pkgdir" --optimize=1 + install -Dm644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE" +} Property changes on: python-simplebayes/trunk/PKGBUILD ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property