Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92735:cb0586abb276
Date: 2017-10-12 16:53 +0200
http://bitbucket.org/pypy/pypy/changeset/cb0586abb276/
Log: Implement and test these
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -29,4 +29,34 @@
def test_utf8_encode_ascii():
assert utf8_encode_ascii("abc", 3, "??", "??") == "abc"
- py.test.skip("test me more...")
+ def eh(errors, encoding, reason, p, start, end):
+ lst.append((errors, encoding, p, start, end))
+ return "<FOO>", end
+ lst = []
+ input = u"\u1234".encode("utf8")
+ assert utf8_encode_ascii(input, 1, "??", eh) == "<FOO>"
+ assert lst == [("??", "ascii", input, 0, 1)]
+ lst = []
+ input = u"\u1234\u5678abc\u8765\u4321".encode("utf8")
+ assert utf8_encode_ascii(input, 7, "??", eh) == "<FOO>abc<FOO>"
+ assert lst == [("??", "ascii", input, 0, 2),
+ ("??", "ascii", input, 5, 7)]
+
+def test_str_decode_ascii():
+ assert str_decode_ascii("abc", 3, "??", True, "??") == ("abc", 3, 3)
+ def eh(errors, encoding, reason, p, start, end):
+ lst.append((errors, encoding, p, start, end))
+ return u"\u1234\u5678", end
+ lst = []
+ input = "\xe8"
+ exp = u"\u1234\u5678".encode("utf8")
+ assert str_decode_ascii(input, 1, "??", True, eh) == (exp, 1, 2)
+ assert lst == [("??", "ascii", input, 0, 1)]
+ lst = []
+ input = "\xe8\xe9abc\xea\xeb"
+ assert str_decode_ascii(input, 7, "??", True, eh) == (
+ exp + exp + "abc" + exp + exp, 7, 11)
+ assert lst == [("??", "ascii", input, 0, 1),
+ ("??", "ascii", input, 1, 2),
+ ("??", "ascii", input, 5, 6),
+ ("??", "ascii", input, 6, 7)]
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -102,28 +102,20 @@
def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
if len(utf8) == utf8len:
return utf8
- assert False, "implement"
- b = StringBuilder(utf8len)
- i = 0
- lgt = 0
- while i < len(utf8):
- c = ord(utf8[i])
- if c <= 0x7F:
- b.append(chr(c))
- lgt += 1
- i += 1
- else:
- utf8_repl, newpos, length = errorhandler(errors, 'ascii',
- 'ordinal not in range (128)', utf8, lgt, lgt + 1)
- return b.build()
+ # No Way At All to emulate the calls to the error handler in
+ # less than three pages, so better not.
+ u = utf8.decode("utf8")
+ w = EncodeWrapper(errorhandler)
+ return runicode.unicode_encode_ascii(u, len(u), errors, w.handle)
def str_decode_ascii(s, slen, errors, final, errorhandler):
try:
rutf8.check_ascii(s)
return s, slen, len(s)
except rutf8.CheckError:
- raise Exception("foo")
- return rutf8.str_decode_ascii(s, slen, errors, errorhandler)
+ w = DecodeWrapper((errorhandler))
+ u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle)
+ return u.encode('utf8'), pos, len(u)
# XXX wrappers, think about speed
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit