tags 761106 + patch thanks On Mon, Mar 16, 2015 at 11:41:59AM +0100, Orestis Ioannou wrote: > Subject: [PATCH] web app: Add support for non-ascii filenames and directories
Hey Orestis, thanks for your patch. I'm Cc:-ing the bug log to record that a patch is available. However, while it seems to fix the problem on my development machine, it doesn't seem to be enough to fix it on sources.debian.net. With your patch applied, sources.d.n still fails with the following backtrace: 2015-03-16 14:21:00,712 ERROR: 'ascii' codec can't decode byte 0xc3 in position 4: ordinal not in range(128) [in /srv/debsources/debsources/app/views.py:137] Traceback (most recent call last): File "/srv/debsources/debsources/app/views.py", line 193, in dispatch_request context = self.get_objects(**kwargs) File "/srv/debsources/debsources/app/sources/views.py", line 330, in get_objects return self._render_location(package, version, path) File "/srv/debsources/debsources/app/sources/views.py", line 129, in _render_location return self._render_directory(location) File "/srv/debsources/debsources/app/sources/views.py", line 147, in _render_directory content = directory.get_listing() File "/srv/debsources/debsources/models.py", line 691, in get_listing for f in os.listdir(self.sources_path)) File "/srv/debsources/debsources/models.py", line 691, in <genexpr> for f in os.listdir(self.sources_path)) UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 4: ordinal not in range(128) Whereas without your patch it failed with this (note that the backtrace is indeed different, even though the origin line is the same): 2015-03-16 14:23:48,108 ERROR: 'ascii' codec can't decode byte 0xc3 in position 4: ordinal not in range(128) [in /srv/debsources/debsources/app/views.py:137] Traceback (most recent call last): File "/srv/debsources/debsources/app/views.py", line 193, in dispatch_request context = self.get_objects(**kwargs) File "/srv/debsources/debsources/app/sources/views.py", line 330, in get_objects return self._render_location(package, version, path) File "/srv/debsources/debsources/app/sources/views.py", line 129, in _render_location return self._render_directory(location) File "/srv/debsources/debsources/app/sources/views.py", line 147, in _render_directory content = directory.get_listing() File "/srv/debsources/debsources/models.py", line 687, in get_listing for f in os.listdir(self.sources_path)) File "/srv/debsources/debsources/models.py", line 687, in <genexpr> for f in os.listdir(self.sources_path)) File "/srv/debsources/debsources/models.py", line 680, in get_type if os.path.isdir(os.path.join(self.sources_path, f)): File "/usr/lib/python2.7/posixpath.py", line 78, in join path += b UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 4: ordinal not in range(128) Any idea what might be going on here? TIA, Cheers. -- Stefano Zacchiroli . . . . . . . z...@upsilon.cc . . . . o . . . o . o Maître de conférences . . . . . http://upsilon.cc/zack . . . o . . . o o Former Debian Project Leader . . @zack on identi.ca . . o o o . . . o . « the first rule of tautology club is the first rule of tautology club »
>From 994c768d3d1efaa2cddac2e16696cd6ca45f4a90 Mon Sep 17 00:00:00 2001 From: Orestis Ioannou <ores...@oioannou.com> Date: Mon, 16 Mar 2015 11:38:53 +0100 Subject: [PATCH] web app: support non-ASCII filenames and directories Closes: #761106 --- .../app/sources/templates/sources/source_folder.html | 4 ++-- debsources/models.py | 11 ++++++++--- debsources/tests/test_webapp.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/debsources/app/sources/templates/sources/source_folder.html b/debsources/app/sources/templates/sources/source_folder.html index 54d73a1..1c78cc6 100644 --- a/debsources/app/sources/templates/sources/source_folder.html +++ b/debsources/app/sources/templates/sources/source_folder.html @@ -42,7 +42,7 @@ {% if config["DIR_LS_LONG"] %}<td class="stat-type"><span>{{ dir.stat.type }}</span></td>{% endif %} <td class="stat-perms"><span>{{ dir.stat.perms }}</span></td> {% if config["DIR_LS_LONG"] %}<td class="stat-size"><span>{{ "{:,d}".format(dir.stat.size) }}</span></td>{% endif %} - <td class="item-name"><a href="{{ url_for('.source', path_to=path+'/'+dir.name) }}">{{ dir.name }}</a> + <td class="item-name"><a href="{{ url_for('.source', path_to=path+'/'+dir.name.decode('utf-8')) }}">{{ dir.name.decode('utf-8') }}</a> {% if config["DIR_LS_LONG"] %} {% if dir.stat.symlink_dest is not none %}{{ " → " + dir.stat.symlink_dest }}{% endif %} {% endif %} @@ -56,7 +56,7 @@ {% if config["DIR_LS_LONG"] %}<td class="stat-type"><span>{{ file_.stat.type }}</span></td>{% endif %} <td class="stat-perms"><span>{{ file_.stat.perms }}</span></td> {% if config["DIR_LS_LONG"] %}<td class="stat-size"><span>{{ "{:,d}".format(file_.stat.size) }}</span></td>{% endif %} - <td class="item-name"><a href="{{ url_for('.source', path_to=path+'/'+file_.name) }}">{{ file_.name }}</a> + <td class="item-name"><a href="{{ url_for('.source', path_to=path+'/'+file_.name.decode('utf-8')) }}">{{ file_.name.decode('utf-8') }}</a> {% if config["DIR_LS_LONG"] %} {% if file_.stat.symlink_dest is not none %}{{ " → " + file_.stat.symlink_dest }}{% endif %} {% endif %} diff --git a/debsources/models.py b/debsources/models.py index c47094c..3d72589 100644 --- a/debsources/models.py +++ b/debsources/models.py @@ -48,6 +48,8 @@ Base = declarative_base() # used for migrations, see scripts under debsources/migrate/ DB_SCHEMA_VERSION = 8 +PATH_ENCODING = 'utf-8' + class PackageName(Base): """ a source package name """ @@ -682,13 +684,15 @@ class Directory(object): else: return "file" get_stat, join_path = self.location.get_stat, os.path.join - listing = sorted(dict(name=f, type=get_type(f), hidden=False, + listing = sorted(dict(name=f.encode(PATH_ENCODING), + type=get_type(f), + hidden=False, stat=get_stat(join_path(self.sources_path, f))) for f in os.listdir(self.sources_path)) for hidden_file in self.hidden_files: for f in listing: - full_path = os.path.join(self.location.sources_path, f['name']) + full_path = os.path.join(self.location.sources_path, f['name'].decode('utf-8')) if f['type'] == "directory": full_path += "/" f['hidden'] = (f['hidden'] @@ -725,13 +729,14 @@ class SourceFile(object): """ Queries the DB and returns the shasum of the file. """ + path = self.location.path.encode(PATH_ENCODING) shasum = session.query(Checksum.sha256) \ .filter(Checksum.package_id == Package.id) \ .filter(Package.name_id == PackageName.id) \ .filter(File.id == Checksum.file_id) \ .filter(PackageName.name == self.location.package) \ .filter(Package.version == self.location.version) \ - .filter(File.path == str(self.location.path)) \ + .filter(File.path == str(path)) \ .first() # WARNING: in the DB path is binary, and here # location.path is unicode, because the path comes from diff --git a/debsources/tests/test_webapp.py b/debsources/tests/test_webapp.py index 3b4b790..1411cb5 100644 --- a/debsources/tests/test_webapp.py +++ b/debsources/tests/test_webapp.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright (C) 2013 Matthieu Caneill <matthieu.cane...@gmail.com> # 2015 Stefano Zacchiroli <z...@upsilon.cc> # @@ -300,6 +301,20 @@ class DebsourcesTestCase(unittest.TestCase, DbTestFixture): "symlink_dest": None} }, rv['content']) + def test_api_folder_utf8(self): + rv = json.loads(self.app.get('/api/src/cvsnt/2.5.03.2382-3/').data) + self.assertEqual(rv['type'], "directory") + self.assertEqual(rv['package'], "cvsnt") + self.assertEqual(rv['directory'], "2.5.03.2382-3") + self.assertIn({"type": "file", + "name": u"testáéíóú.txt", + "hidden": False, + "stat": {u'perms': u'rw-r--r--', + u'type': u'-', + u'symlink_dest': None, + u'size': 5} + },rv['content']) + def test_api_symlink_dest(self): rv = json.loads(self.app.get('/api/src/beignet/1.0.0-1/').data) self.assertIn({"type": "file", @@ -373,6 +388,10 @@ class DebsourcesTestCase(unittest.TestCase, DbTestFixture): self.assertIn('<a href="/src/ledit/2.01-6/">parent folder</a>', rv.data) + def test_source_file_utf8(self): + rv = self.app.get('src/cvsnt/2.5.03.2382-3/testáéíóú.txt/') + self.assertIn('File: testáéíóú.txt', rv.data) + def test_source_file_text(self): rv = self.app.get('/src/ledit/2.01-6/README/') self.assertIn('<code id="sourcecode" class="no-highlight">', rv.data) -- 2.1.4