Hi Folks, Tahoe's 1.5.0 release is approaching and IMHO support for accented characters in filenames is a pretty important feature (see bug #534 for details).
Because this is a somewhat large change, I need *your* help reviewing it. As far I can tell, the proposed changes add support for the basic functionality without compromising further enhancements or introducing compatibility problems. The current state of the code only tries to decode filenames according to Python's getfilesystemencoding() without any clever handling of badly encoded filename. In such case, an error message is displayed to the user and tahoe exits. Advanced heuristics such as the one which were thoroughly discussed on this mailing-list are not yet implemented. I've included the complete patch at the end of this mail to collect your comments inline, but you can also download a darcs patch bundle containing all the discrete patches from [1] for easier review. Non-Linux users are also wholeheartedly welcome because all this was developed under Ubuntu Linux. Thank you very much for your time ! François Bug #534: http://allmydata.org/trac/tahoe/ticket/534 [1] http://allmydata.org/trac/tahoe/attachment/ticket/534/tahoe-534-bundle.dpatch diff -rN -u old-tahoe-534/docs/frontends/CLI.txt new-tahoe-534/docs/frontends/CLI.txt --- old-tahoe-534/docs/frontends/CLI.txt 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/docs/frontends/CLI.txt 2009-06-05 21:14:40.000000000 +0200 @@ -91,9 +91,21 @@ These commands also use a table of "aliases" to figure out which directory they ought to use a starting point. This is explained in more detail below. -In Tahoe v1.3.0, passing non-ascii characters to the cli is not guaranteed to -work, although it might work on your platform, especially if your platform -uses utf-8 encoding. +As of Tahoe v1.3.1, filenames containing non-ascii characters are +supported on the commande line if your terminal is correctly configured +for UTF-8 support. This is usually the case on moderns GNU/Linux +distributions. + +If your terminal doesn't support UTF-8, you will still be able to list +directories but non-ascii characters will be replaced by a question mark +(?) on display. + +Reading from and writing to files whose name contain non-ascii +characters is also supported when your system correctly understand them. +Under Unix, this is usually handled by locale settings. If Tahoe cannot +correctly decode a filename, it will raise an error. In such case, +you'll need to correct the name of your file, possibly with help from +tools such as convmv. === Starting Directories === diff -rN -u old-tahoe-534/src/allmydata/scripts/cli.py new-tahoe-534/src/allmydata/scripts/cli.py --- old-tahoe-534/src/allmydata/scripts/cli.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/cli.py 2009-06-05 21:14:40.000000000 +0200 @@ -1,6 +1,7 @@ import os.path, re, sys, fnmatch from twisted.python import usage from allmydata.scripts.common import BaseOptions, get_aliases +from allmydata.util.stringutils import argv_to_unicode NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?") @@ -49,12 +50,12 @@ class MakeDirectoryOptions(VDriveOptions): def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """Create a new directory, either unlinked or as a subdirectory.""" class AddAliasOptions(VDriveOptions): def parseArgs(self, alias, cap): - self.alias = alias + self.alias = argv_to_unicode(alias) self.cap = cap def getSynopsis(self): @@ -64,7 +65,7 @@ class CreateAliasOptions(VDriveOptions): def parseArgs(self, alias): - self.alias = alias + self.alias = argv_to_unicode(alias) def getSynopsis(self): return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),) @@ -83,7 +84,7 @@ ("json", None, "Show the raw JSON output"), ] def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """List the contents of some portion of the virtual drive.""" @@ -94,8 +95,13 @@ # tahoe get FOO bar # write to local file # tahoe get tahoe:FOO bar # same - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + + if arg2: + self.to_file = argv_to_unicode(arg2) + else: + self.to_file = None + if self.to_file == "-": self.to_file = None @@ -131,15 +137,15 @@ # tahoe put bar tahoe:FOO # same if arg1 is not None and arg2 is not None: - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + self.to_file = argv_to_unicode(arg2) elif arg1 is not None and arg2 is None: - self.from_file = arg1 # might be "-" + self.from_file = argv_to_unicode(arg1) # might be "-" self.to_file = None else: self.from_file = None self.to_file = None - if self.from_file == "-": + if self.from_file == u"-": self.from_file = None def getSynopsis(self): @@ -176,28 +182,28 @@ def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = args[:-1] - self.destination = args[-1] + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) class RmOptions(VDriveOptions): def parseArgs(self, where): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s rm VDRIVE_FILE" % (os.path.basename(sys.argv[0]),) class MvOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),) class LnOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),) @@ -221,8 +227,8 @@ self['exclude'] = set() def parseArgs(self, localdir, topath): - self.from_dir = localdir - self.to_dir = topath + self.from_dir = argv_to_unicode(localdir) + self.to_dir = argv_to_unicode(topath) def getSynopsis(Self): return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0]) @@ -270,7 +276,7 @@ class WebopenOptions(VDriveOptions): def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -285,7 +291,7 @@ ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -297,7 +303,7 @@ ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -312,7 +318,7 @@ ("add-lease", None, "Add/renew lease on all shares"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -328,7 +334,7 @@ ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) diff -rN -u old-tahoe-534/src/allmydata/scripts/common.py new-tahoe-534/src/allmydata/scripts/common.py --- old-tahoe-534/src/allmydata/scripts/common.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/common.py 2009-06-05 21:14:40.000000000 +0200 @@ -1,7 +1,8 @@ import os, sys, urllib +import codecs from twisted.python import usage - +from allmydata.util.stringutils import unicode_to_url class BaseOptions: # unit tests can override these to point at StringIO instances @@ -100,14 +101,14 @@ except EnvironmentError: pass try: - f = open(aliasfile, "r") + f = codecs.open(aliasfile, "r", "utf-8") for line in f.readlines(): line = line.strip() if line.startswith("#") or not line: continue name, cap = line.split(":", 1) # normalize it: remove http: prefix, urldecode - cap = cap.strip() + cap = cap.strip().encode('ascii') aliases[name] = uri.from_string_dirnode(cap).to_string() except EnvironmentError: pass @@ -163,4 +164,4 @@ def escape_path(path): segments = path.split("/") - return "/".join([urllib.quote(s) for s in segments]) + return "/".join([urllib.quote(unicode_to_url(s)) for s in segments]) diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py --- old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2009-06-05 21:14:40.000000000 +0200 @@ -1,8 +1,11 @@ import os.path +import codecs +import sys from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases +from allmydata.util.stringutils import unicode_to_stdout def add_alias(options): nodedir = options['node-directory'] @@ -52,10 +55,10 @@ new_uri = resp.read().strip() # probably check for others.. - f = open(aliasfile, "a") + f = codecs.open(aliasfile, "a", "utf-8") f.write("%s: %s\n" % (alias, new_uri)) f.close() - print >>stdout, "Alias '%s' created" % (alias,) + print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),) return 0 def list_aliases(options): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_backup.py new-tahoe-534/src/allmydata/scripts/tahoe_backup.py --- old-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2009-06-05 21:14:40.000000000 +0200 @@ -4,11 +4,15 @@ import urllib import simplejson import datetime +import sys from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS from allmydata.scripts.common_http import do_http from allmydata import uri from allmydata.util import time_format from allmydata.scripts import backupdb +from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs, unicode_to_stdout +from allmydata.util.assertutil import precondition +from twisted.python import usage class HTTPError(Exception): pass @@ -245,9 +249,10 @@ def verboseprint(self, msg): if self.verbosity >= 2: - print >>self.options.stdout, msg + print >>self.options.stdout, unicode_to_stdout(msg) def process(self, localpath, olddircap): + precondition(isinstance(localpath, unicode), localpath) # returns newdircap self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap)) @@ -256,7 +261,8 @@ olddircontents = self.readdir(olddircap) newdircontents = {} # childname -> (type, rocap, metadata) - for child in self.options.filter_listdir(os.listdir(localpath)): + for child in self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))): + child = fs_to_unicode(child) childpath = os.path.join(localpath, child) if os.path.isdir(childpath): metadata = get_local_metadata(childpath) @@ -342,6 +348,8 @@ return contents def upload(self, childpath): + precondition(isinstance(childpath, unicode), childpath) + #self.verboseprint("uploading %s.." % childpath) metadata = get_local_metadata(childpath) @@ -350,7 +358,7 @@ if must_upload: self.verboseprint("uploading %s.." % childpath) - infileobj = open(os.path.expanduser(childpath), "rb") + infileobj = open(unicode_to_fs(os.path.expanduser(childpath)), "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_cp.py new-tahoe-534/src/allmydata/scripts/tahoe_cp.py --- old-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2009-06-05 21:14:40.000000000 +0200 @@ -4,9 +4,13 @@ import simplejson from cStringIO import StringIO from twisted.python.failure import Failure +import sys from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker from allmydata.scripts.common_http import do_http from allmydata import uri +from twisted.python import usage +from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs, unicode_to_url +from allmydata.util.assertutil import precondition def ascii_or_none(s): if s is None: @@ -69,6 +73,7 @@ class LocalFileSource: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def need_to_copy_bytes(self): @@ -79,6 +84,7 @@ class LocalFileTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): outf = open(self.pathname, "wb") @@ -91,6 +97,7 @@ class LocalMissingTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): @@ -104,6 +111,8 @@ class LocalDirectorySource: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -112,8 +121,9 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = os.listdir(unicode_to_fs(self.pathname)) for i,n in enumerate(children): + n = fs_to_unicode(n) self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): @@ -129,6 +139,8 @@ class LocalDirectoryTarget: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -137,8 +149,9 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = os.listdir(unicode_to_fs(self.pathname)) for i,n in enumerate(children): + n = fs_to_unicode(n) self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): @@ -160,8 +173,9 @@ return LocalDirectoryTarget(self.progressfunc, pathname) def put_file(self, name, inf): + precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) - outf = open(pathname, "wb") + outf = open(unicode_to_fs(pathname), "wb") while True: data = inf.read(32768) if not data: @@ -350,7 +364,7 @@ if self.writecap: url = self.nodeurl + "/".join(["uri", urllib.quote(self.writecap), - urllib.quote(name.encode('utf-8'))]) + urllib.quote(unicode_to_url(name))]) self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) else: diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_ls.py new-tahoe-534/src/allmydata/scripts/tahoe_ls.py --- old-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2009-06-05 21:14:40.000000000 +0200 @@ -3,6 +3,7 @@ import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path from allmydata.scripts.common_http import do_http +from allmydata.util.stringutils import unicode_to_stdout def list(options): nodeurl = options['node-url'] @@ -112,7 +113,7 @@ line.append(ctime_s) if not options["classify"]: classify = "" - line.append(name + classify) + line.append(unicode_to_stdout(name) + classify) if options["uri"]: line.append(uri) if options["readonly-uri"]: diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py --- old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2009-06-05 21:14:40.000000000 +0200 @@ -80,7 +80,7 @@ try: print >>stdout, d["cap"], "/".join(d["path"]) except UnicodeEncodeError: - print >>stdout, d["cap"], "/".join([p.encode("utf-8") + print >>stdout, d["cap"], "/".join([unicode_to_stdout(p) for p in d["path"]]) def manifest(options): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py --- old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2009-06-05 21:14:40.000000000 +0200 @@ -2,6 +2,7 @@ import urllib from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS +from allmydata.util.stringutils import unicode_to_url def mkdir(options): nodeurl = options['node-url'] @@ -31,7 +32,7 @@ path = path[:-1] # path (in argv) must be "/".join([s.encode("utf-8") for s in segments]) url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap), - urllib.quote(path)) + urllib.quote(unicode_to_url(path))) resp = do_http("POST", url) check_http_error(resp, stderr) new_uri = resp.read().strip() diff -rN -u old-tahoe-534/src/allmydata/test/test_cli.py new-tahoe-534/src/allmydata/test/test_cli.py --- old-tahoe-534/src/allmydata/test/test_cli.py 2009-06-05 21:14:40.000000000 +0200 +++ new-tahoe-534/src/allmydata/test/test_cli.py 2009-06-05 21:14:40.000000000 +0200 @@ -6,6 +6,7 @@ import urllib import re import simplejson +import sys from allmydata.util import fileutil, hashutil, base32 from allmydata import uri @@ -518,6 +519,48 @@ self._test_webopen(["two:"], self.two_url) d.addCallback(_test_urls) + d.addCallback(lambda res: self.do_cli("create-alias", "études")) + def _check_create_unicode((rc,stdout,stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + # If stdout only supports ascii, accentuated characters are + # being replaced by '?' + if sys.stdout.encoding == "ANSI_X3.4-1968": + self.failUnless("Alias '?tudes' created" in stdout) + else: + self.failUnless("Alias 'études' created" in stdout) + + aliases = get_aliases(self.get_clientdir()) + self.failUnless(aliases[u"études"].startswith("URI:DIR2:")) + d.addCallback(_check_create_unicode) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls1((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "") + d.addCallback(_check_ls1) + + d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt", + stdin="Blah blah blah")) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls2((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "uploaded.txt\n") + d.addCallback(_check_ls2) + + d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt")) + def _check_get((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + self.failUnlessEqual(stdout, "Blah blah blah") + d.addCallback(_check_get) + return d class Put(GridTestMixin, CLITestMixin, unittest.TestCase): @@ -739,6 +782,37 @@ d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) return d + def test_immutable_from_file_unicode(self): + # tahoe put file.txt "à trier.txt" + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + rel_fn = os.path.join(self.basedir, "DATAFILE") + abs_fn = os.path.abspath(rel_fn) + # we make the file small enough to fit in a LIT file, for speed + DATA = "short file" + f = open(rel_fn, "w") + f.write(DATA) + f.close() + + d = self.do_cli("create-alias", "tahoe") + + d.addCallback(lambda res: + self.do_cli("put", rel_fn, "à trier.txt")) + def _uploaded((rc,stdout,stderr)): + readcap = stdout.strip() + self.failUnless(readcap.startswith("URI:LIT:")) + self.failUnless("201 Created" in stderr, stderr) + self.readcap = readcap + d.addCallback(_uploaded) + + d.addCallback(lambda res: + self.do_cli("get", "tahoe:à trier.txt")) + d.addCallback(lambda (rc,stdout,stderr): + self.failUnlessEqual(stdout, DATA)) + + return d + class List(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list(self): self.basedir = "cli/List/list" @@ -795,30 +869,37 @@ def test_unicode_filename(self): self.basedir = "cli/Cp/unicode_filename" self.set_up_grid() + d = self.do_cli("create-alias", "tahoe") + + # Use unicode strings when calling os functions + if sys.getfilesystemencoding() == "ANSI_X3.4-1968": + fn1 = os.path.join(self.basedir, u"Artonwall") + else: + fn1 = os.path.join(self.basedir, u"Ärtonwall") - fn1 = os.path.join(self.basedir, "Ärtonwall") DATA1 = "unicode file content" open(fn1, "wb").write(DATA1) + d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall")) + + d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) + d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) - fn2 = os.path.join(self.basedir, "Metallica") + + fn2 = os.path.join(self.basedir, u"Metallica") DATA2 = "non-unicode file content" open(fn2, "wb").write(DATA2) # Bug #534 # Assure that uploading a file whose name contains unicode character doesn't # prevent further uploads in the same directory - d = self.do_cli("create-alias", "tahoe") - d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) - d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) - - d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) - d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) + d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica")) d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) + d.addCallback(lambda res: self.do_cli("ls", "tahoe:")) + return d - test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms. See issue ticket #534." def test_dangling_symlink_vs_recursion(self): if not hasattr(os, 'symlink'): @@ -837,6 +918,17 @@ dn, "tahoe:")) return d +class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): + def test_unicode_mkdir(self): + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + d = self.do_cli("create-alias", "tahoe") + d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead")) + + return d + + class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def writeto(self, path, data): @@ -871,6 +963,11 @@ self.writeto("parent/subdir/bar.txt", "bar\n" * 1000) self.writeto("parent/blah.txt", "blah") + if sys.getfilesystemencoding() == "ANSI_X3.4-1968": + self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi") + else: + self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi") + def do_backup(use_backupdb=True, verbose=False): cmd = ["backup"] if not have_bdb or not use_backupdb: @@ -895,8 +992,8 @@ self.failUnlessEqual(err, "") self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) - # foo.txt, bar.txt, blah.txt - self.failUnlessEqual(fu, 3) + # foo.txt, bar.txt, blah.txt, ärtonwall.txt + self.failUnlessEqual(fu, 4) self.failUnlessEqual(fr, 0) # empty, home, home/parent, home/parent/subdir self.failUnlessEqual(dc, 4) @@ -945,9 +1042,9 @@ self.failUnlessEqual(rc, 0) if have_bdb: fu, fr, dc, dr = self.count_output(out) - # foo.txt, bar.txt, blah.txt + # foo.txt, bar.txt, blah.txt, ärtonwall.txt self.failUnlessEqual(fu, 0) - self.failUnlessEqual(fr, 3) + self.failUnlessEqual(fr, 4) # empty, home, home/parent, home/parent/subdir self.failUnlessEqual(dc, 0) self.failUnlessEqual(dr, 4) @@ -975,9 +1072,9 @@ self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) fchecked, dchecked, dread = self.count_output2(out) - self.failUnlessEqual(fchecked, 3) + self.failUnlessEqual(fchecked, 4) self.failUnlessEqual(fu, 0) - self.failUnlessEqual(fr, 3) + self.failUnlessEqual(fr, 4) # TODO: backupdb doesn't do dirs yet; when it does, this will # change to dchecked=4, and maybe dread=0 self.failUnlessEqual(dchecked, 0) @@ -1023,8 +1120,8 @@ fu, fr, dc, dr = self.count_output(out) # new foo.txt, surprise file, subfile, empty self.failUnlessEqual(fu, 4) - # old bar.txt - self.failUnlessEqual(fr, 1) + # old bar.txt, ärtonwall.txt + self.failUnlessEqual(fr, 2) # home, parent, subdir, blah.txt, surprisedir self.failUnlessEqual(dc, 5) self.failUnlessEqual(dr, 0) @@ -1063,7 +1160,7 @@ self.failUnlessEqual(err, "") self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) - self.failUnlessEqual(fu, 5) + self.failUnlessEqual(fu, 6) self.failUnlessEqual(fr, 0) self.failUnlessEqual(dc, 0) self.failUnlessEqual(dr, 5) diff -rN -u old-tahoe-534/src/allmydata/util/stringutils.py new-tahoe-534/src/allmydata/util/stringutils.py --- old-tahoe-534/src/allmydata/util/stringutils.py 1970-01-01 01:00:00.000000000 +0100 +++ new-tahoe-534/src/allmydata/util/stringutils.py 2009-06-05 21:14:40.000000000 +0200 @@ -0,0 +1,70 @@ +""" +Functions used to convert inputs from whatever encoding used in the system to +unicode and back. + +TODO: + * Accept two cli arguments --argv-encoding and --filesystem-encoding +""" + +import sys +from allmydata.util.assertutil import precondition +from twisted.python import usage + +def argv_to_unicode(s): + """ + Decode given argv element to unicode. + """ + # sys.argv encoding detection in Python is not trivial so utf-8 is + # currently used by default and an informative error message is given if + # the argument cannot be correctly decoded. + + precondition(isinstance(s, str), s) + try: + return unicode(s, 'utf-8') + except UnicodeEncodeError: + raise usageError("Argument '%s' cannot be decoded as UTF-8." % s) + +def fs_to_unicode(s): + """ + Decode a filename (or a directory name) to unicode using the same encoding + as the filesystem. + """ + # Filename encoding detection is a little bit better thanks to + # getfilesystemencoding() in the sys module. However, filenames can be + # encoded using another encoding than the one used on the filesystem. + + precondition(isinstance(s, str), s) + encoding = sys.getfilesystemencoding() + try: + return unicode(s, encoding) + except UnicodeDecodeError: + raise usage.UsageError("Filename '%s' cannot be decoded using the current encoding of your filesystem (%s). Please rename this file." % (s, encoding)) + +def unicode_to_fs(s): + """ + Encode an unicode object used in file or directoy name. + """ + + precondition(isinstance(s, unicode), s) + encoding = sys.getfilesystemencoding() + try: + return s.encode(encoding) + except UnicodeEncodeError: + raise usage.UsageError("Filename '%s' cannot be encoded using the current encoding of your filesystem (%s). Please configure your locale correctly or rename this file." % (s, encoding)) + +def unicode_to_url(s): + """ + Encode an unicode object used in an URL. + """ + # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded. + + precondition(isinstance(s, unicode), s) + return s.encode('utf-8') + +def unicode_to_stdout(s): + """ + Encode an unicode object for representation on stdout. + """ + + precondition(isinstance(s, unicode), s) + return s.encode(sys.stdout.encoding, 'replace') _______________________________________________ tahoe-dev mailing list [email protected] http://allmydata.org/cgi-bin/mailman/listinfo/tahoe-dev
