Package: git-buildpackage Version: 0.9.9 Severity: normal Tags: patch I ran into the following issue when importing the history of a Debian package:
> gbp import-dscs --debsnap wireless-tools gbp:info: Downloading snapshots of 'wireless-tools' to '/tmp/tmpkzi1rlbg'... gbp:info: No git repository found, creating one. Traceback (most recent call last): File "/usr/bin/gbp", line 149, in <module> sys.exit(supercommand()) File "/usr/bin/gbp", line 145, in supercommand return module.main(args) File "/usr/lib/python3/dist-packages/gbp/scripts/import_dscs.py", line 180, in main if importer.importdsc(dscs[0]): File "/usr/lib/python3/dist-packages/gbp/scripts/import_dscs.py", line 72, in importdsc return import_dsc.main(['import-dsc'] + self.args + [dsc.dscfile]) File "/usr/lib/python3/dist-packages/gbp/scripts/import_dsc.py", line 518, in main apply_debian_patch(repo, source, dsc, commit, options) File "/usr/lib/python3/dist-packages/gbp/scripts/import_dsc.py", line 174, in apply_debian_patch author = get_author_from_changelog(source.unpacked) File "/usr/lib/python3/dist-packages/gbp/scripts/import_dsc.py", line 114, in get_author_from_changelog dch = ChangeLog(filename=os.path.join(dir, 'debian/changelog')) File "/usr/lib/python3/dist-packages/gbp/deb/changelog.py", line 89, in __init__ self._read() File "/usr/lib/python3/dist-packages/gbp/deb/changelog.py", line 132, in _read self._contents = f.read() File "/usr/lib/python3.6/codecs.py", line 321, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf6 in position 906: invalid start byte This happened while it was importing version 23-2 (see http://snapshot.debian.org/package/wireless-tools/23-2/). The changelog back then was in ISO-8859-1. I've attached a patch that treats invalid UTF-8 files as ISO-8859-1. -- System Information: Debian Release: buster/sid APT prefers unstable APT policy: (500, 'unstable'), (1, 'experimental') Architecture: amd64 (x86_64) Foreign Architectures: i386 Kernel: Linux 4.15.2 (SMP w/12 CPU cores) Locale: LANG=nl_NL.utf8, LC_CTYPE=nl_NL.utf8 (charmap=UTF-8), LANGUAGE=nl_NL.utf8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system) LSM: AppArmor: enabled Versions of packages git-buildpackage depends on: ii devscripts 2.18.2 ii git 1:2.17.0-1 ii man-db 2.8.3-2 ii python3 3.6.5-3 ii python3-dateutil 2.6.1-1 ii python3-pkg-resources 39.1.0-1 Versions of packages git-buildpackage recommends: ii cowbuilder 0.87+b1 ii pbuilder 0.229.2 ii pristine-tar 1.44 ii python3-requests 2.18.4-2 Versions of packages git-buildpackage suggests: pn python3-notify2 <none> ii sudo 1.8.23-1 ii unzip 6.0-21 -- no debconf information
>From 48bc76b8a5294098548ef8c6b10e0f25b718fddf Mon Sep 17 00:00:00 2001 From: Guus Sliepen <g...@debian.org> Date: Tue, 5 Jun 2018 21:41:28 +0200 Subject: [PATCH] Treat changelogs with invalid UTF-8 sequences as ISO-8859-1. This allows import-dscs to import old versions of a package that did not yet use UTF-8 encoding. --- gbp/deb/changelog.py | 8 ++++++-- gbp/git/vfs.py | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gbp/deb/changelog.py b/gbp/deb/changelog.py index 5cfaaf79..dda9b753 100644 --- a/gbp/deb/changelog.py +++ b/gbp/deb/changelog.py @@ -128,8 +128,12 @@ class ChangeLog(object): self._cp = cp def _read(self): - with open(self.filename, encoding='utf-8') as f: - self._contents = f.read() + try: + with open(self.filename, encoding='utf-8') as f: + self._contents = f.read() + except UnicodeDecodeError: + with open(self.filename, encoding='iso-8859-1') as f: + self._contents = f.read() def __getitem__(self, item): return self._cp[item] diff --git a/gbp/git/vfs.py b/gbp/git/vfs.py index 8363f77b..ec47201a 100644 --- a/gbp/git/vfs.py +++ b/gbp/git/vfs.py @@ -33,7 +33,10 @@ class GitVfs(object): if binary: self._data = io.BytesIO(content) else: - self._data = io.StringIO(content.decode()) + try: + self._data = io.StringIO(content.decode()) + except UnicodeDecodeError: + self._data = io.StringIO(content.decode("iso-8859-1")) def readline(self): return self._data.readline() -- 2.17.0