Author: Tobias Pape <tob...@netshed.de> Branch: Changeset: r61995:fc1de558db21 Date: 2013-03-04 09:06 +0100 http://bitbucket.org/pypy/pypy/changeset/fc1de558db21/
Log: Small dotviewer fixes. * use strunicode throughout, also in msgstruct. -> avoids "utf-8" in msgstuct.py * do not fail on garbage (+tests) diff --git a/dotviewer/msgstruct.py b/dotviewer/msgstruct.py --- a/dotviewer/msgstruct.py +++ b/dotviewer/msgstruct.py @@ -1,5 +1,6 @@ import sys, os from struct import pack, unpack, calcsize +from strunicode import forceencoded MAGIC = -0x3b83728b @@ -24,15 +25,10 @@ long_max = 2147483647 -def _encodeme(x): - if type(x) is unicode: - x = x.encode('utf-8') - return x - def message(tp, *values): #print >> sys.stderr, tp, values typecodes = [''] - values = map(_encodeme, values) + values = map(forceencoded, values) for v in values: if type(v) is str: typecodes.append('%ds' % len(v)) diff --git a/dotviewer/strunicode.py b/dotviewer/strunicode.py --- a/dotviewer/strunicode.py +++ b/dotviewer/strunicode.py @@ -1,9 +1,15 @@ RAW_ENCODING = "utf-8" - +ENCODING_ERROR_HANDLING = "replace" def forceunicode(name): - return name if isinstance(name, unicode) else name.decode(RAW_ENCODING) + """ returns `name` as unicode, even if it wasn't before """ + return name if isinstance(name, unicode) else name.decode(RAW_ENCODING, ENCODING_ERROR_HANDLING) def forcestr(name): - return name if isinstance(name, str) else name.encode(RAW_ENCODING) + """ returns `name` as (possibly `RAW_ENCODING` encoded) string, even if it wasn't before """ + return name if isinstance(name, str) else name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING) + +def forceencoded(name): + """ returns `name` as encoded string if it was unicode before """ + return name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING) if isinstance(name, unicode) else name diff --git a/dotviewer/test/test_unicode_util.py b/dotviewer/test/test_unicode_util.py --- a/dotviewer/test/test_unicode_util.py +++ b/dotviewer/test/test_unicode_util.py @@ -3,7 +3,7 @@ # import py import codecs -from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode +from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode, forceencoded SOURCE1 = u"""digraph G{ λ -> b @@ -18,7 +18,7 @@ def test_idempotent(self): x = u"a" assert forceunicode(forcestr(x)) == x - + x = u"λ" assert forceunicode(forcestr(x)) == x @@ -40,7 +40,7 @@ x_u = forceunicode(x_e) assert forceunicode(x_u) == x_u - def test_file(self): + def test_file(self): udir = py.path.local.make_numbered_dir(prefix='usession-dot-', keep=3) full_filename = str(udir.join(FILENAME)) f = codecs.open(full_filename, 'wb', RAW_ENCODING) @@ -55,3 +55,30 @@ f3.close() result = (c == SOURCE1) assert result + + def test_only_unicode_encode(self): + + sut = [1, u"a", "miau", u"λ"] + expected = [int, str, str , str ] + + results = map(forceencoded, sut) + + + for result, expected_type in zip(results, expected): + assert isinstance(result, expected_type) + + def test_forceunicode_should_not_fail(self): + + garbage = "\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage with a lambda + + result = forceunicode(garbage) + assert True, "should not raise" + + def test_forcestr_should_not_fail(self): + + garbage = u"\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage + + result = forcestr(garbage) + assert True, "should not raise" + + _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit