Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92735:cb0586abb276
Date: 2017-10-12 16:53 +0200
http://bitbucket.org/pypy/pypy/changeset/cb0586abb276/

Log:    Implement and test these

diff --git a/pypy/interpreter/test/test_unicodehelper.py 
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -29,4 +29,34 @@
 
 def test_utf8_encode_ascii():
     assert utf8_encode_ascii("abc", 3, "??", "??") == "abc"
-    py.test.skip("test me more...")
+    def eh(errors, encoding, reason, p, start, end):
+        lst.append((errors, encoding, p, start, end))
+        return "<FOO>", end
+    lst = []
+    input = u"\u1234".encode("utf8")
+    assert utf8_encode_ascii(input, 1, "??", eh) == "<FOO>"
+    assert lst == [("??", "ascii", input, 0, 1)]
+    lst = []
+    input = u"\u1234\u5678abc\u8765\u4321".encode("utf8")
+    assert utf8_encode_ascii(input, 7, "??", eh) == "<FOO>abc<FOO>"
+    assert lst == [("??", "ascii", input, 0, 2),
+                   ("??", "ascii", input, 5, 7)]
+
+def test_str_decode_ascii():
+    assert str_decode_ascii("abc", 3, "??", True, "??") == ("abc", 3, 3)
+    def eh(errors, encoding, reason, p, start, end):
+        lst.append((errors, encoding, p, start, end))
+        return u"\u1234\u5678", end
+    lst = []
+    input = "\xe8"
+    exp = u"\u1234\u5678".encode("utf8")
+    assert str_decode_ascii(input, 1, "??", True, eh) == (exp, 1, 2)
+    assert lst == [("??", "ascii", input, 0, 1)]
+    lst = []
+    input = "\xe8\xe9abc\xea\xeb"
+    assert str_decode_ascii(input, 7, "??", True, eh) == (
+        exp + exp + "abc" + exp + exp, 7, 11)
+    assert lst == [("??", "ascii", input, 0, 1),
+                   ("??", "ascii", input, 1, 2),
+                   ("??", "ascii", input, 5, 6),
+                   ("??", "ascii", input, 6, 7)]
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -102,28 +102,20 @@
 def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
     if len(utf8) == utf8len:
         return utf8
-    assert False, "implement"
-    b = StringBuilder(utf8len)
-    i = 0
-    lgt = 0
-    while i < len(utf8):
-        c = ord(utf8[i])
-        if c <= 0x7F:
-            b.append(chr(c))
-            lgt += 1
-            i += 1
-        else:
-            utf8_repl, newpos, length = errorhandler(errors, 'ascii', 
-                'ordinal not in range (128)', utf8, lgt, lgt + 1)
-    return b.build()
+    # No Way At All to emulate the calls to the error handler in
+    # less than three pages, so better not.
+    u = utf8.decode("utf8")
+    w = EncodeWrapper(errorhandler)
+    return runicode.unicode_encode_ascii(u, len(u), errors, w.handle)
 
 def str_decode_ascii(s, slen, errors, final, errorhandler):
     try:
         rutf8.check_ascii(s)
         return s, slen, len(s)
     except rutf8.CheckError:
-        raise Exception("foo")
-        return rutf8.str_decode_ascii(s, slen, errors, errorhandler)
+        w = DecodeWrapper((errorhandler))
+        u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle)
+        return u.encode('utf8'), pos, len(u)
 
 # XXX wrappers, think about speed
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to