Author: Tobias Pape <tob...@netshed.de>
Branch: 
Changeset: r61995:fc1de558db21
Date: 2013-03-04 09:06 +0100
http://bitbucket.org/pypy/pypy/changeset/fc1de558db21/

Log:    Small dotviewer fixes.

        * use strunicode throughout, also in msgstruct.
          -> avoids "utf-8" in msgstuct.py
        * do not fail on garbage (+tests)

diff --git a/dotviewer/msgstruct.py b/dotviewer/msgstruct.py
--- a/dotviewer/msgstruct.py
+++ b/dotviewer/msgstruct.py
@@ -1,5 +1,6 @@
 import sys, os
 from struct import pack, unpack, calcsize
+from strunicode import forceencoded
 
 MAGIC = -0x3b83728b
 
@@ -24,15 +25,10 @@
 long_max = 2147483647
 
 
-def _encodeme(x):
-    if type(x) is unicode:
-        x = x.encode('utf-8')
-    return x
-
 def message(tp, *values):
     #print >> sys.stderr, tp, values
     typecodes = ['']
-    values = map(_encodeme, values)
+    values = map(forceencoded, values)
     for v in values:
         if type(v) is str:
             typecodes.append('%ds' % len(v))
diff --git a/dotviewer/strunicode.py b/dotviewer/strunicode.py
--- a/dotviewer/strunicode.py
+++ b/dotviewer/strunicode.py
@@ -1,9 +1,15 @@
 RAW_ENCODING = "utf-8"
-
+ENCODING_ERROR_HANDLING = "replace"
 
 def forceunicode(name):
-    return name if isinstance(name, unicode) else name.decode(RAW_ENCODING)
+    """ returns `name` as unicode, even if it wasn't before  """
+    return name if isinstance(name, unicode) else name.decode(RAW_ENCODING, 
ENCODING_ERROR_HANDLING)
 
 
 def forcestr(name):
-    return name if isinstance(name, str) else name.encode(RAW_ENCODING)
+    """ returns `name` as (possibly `RAW_ENCODING` encoded) string, even if it 
wasn't before  """
+    return name if isinstance(name, str) else name.encode(RAW_ENCODING, 
ENCODING_ERROR_HANDLING)
+
+def forceencoded(name):
+    """ returns `name` as encoded string if it was unicode before """
+    return name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING) if 
isinstance(name, unicode) else name
diff --git a/dotviewer/test/test_unicode_util.py 
b/dotviewer/test/test_unicode_util.py
--- a/dotviewer/test/test_unicode_util.py
+++ b/dotviewer/test/test_unicode_util.py
@@ -3,7 +3,7 @@
 #
 import py
 import codecs
-from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode
+from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode, 
forceencoded
 
 SOURCE1 = u"""digraph G{
 &#955; -> b
@@ -18,7 +18,7 @@
     def test_idempotent(self):
         x = u"a"
         assert forceunicode(forcestr(x)) == x
-        
+
         x = u"&#955;"
         assert forceunicode(forcestr(x)) == x
 
@@ -40,7 +40,7 @@
         x_u = forceunicode(x_e)
         assert forceunicode(x_u) == x_u
 
-    def test_file(self):       
+    def test_file(self):
         udir = py.path.local.make_numbered_dir(prefix='usession-dot-', keep=3)
         full_filename = str(udir.join(FILENAME))
         f = codecs.open(full_filename, 'wb', RAW_ENCODING)
@@ -55,3 +55,30 @@
         f3.close()
         result = (c == SOURCE1)
         assert result
+
+    def test_only_unicode_encode(self):
+
+        sut =      [1,   u"a", "miau", u"&#955;"]
+        expected = [int, str,  str   , str ]
+
+        results = map(forceencoded, sut)
+
+
+        for result, expected_type in zip(results, expected):
+            assert isinstance(result, expected_type)
+
+    def test_forceunicode_should_not_fail(self):
+
+        garbage = "\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage with a lambda
+
+        result = forceunicode(garbage)
+        assert True, "should not raise"
+
+    def test_forcestr_should_not_fail(self):
+
+        garbage = u"\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage
+
+        result = forcestr(garbage)
+        assert True, "should not raise"
+
+
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to