Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r78495:8dd96fd0cca6 Date: 2015-07-08 13:29 +0200 http://bitbucket.org/pypy/pypy/changeset/8dd96fd0cca6/
Log: Elidable-ize the convertion from ascii string to unicode diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -6,7 +6,7 @@ from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib.runicode import ( make_unicode_escape_function, str_decode_ascii, str_decode_utf_8, - unicode_encode_ascii, unicode_encode_utf_8) + unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii) from pypy.interpreter import unicodehelper from pypy.interpreter.baseobjspace import W_Root @@ -481,9 +481,13 @@ if encoding == 'ascii': # XXX error handling s = space.charbuf_w(w_obj) - eh = unicodehelper.decode_error_handler(space) - return space.wrap(str_decode_ascii( - s, len(s), None, final=True, errorhandler=eh)[0]) + try: + u = fast_str_decode_ascii(s) + except ValueError: + eh = unicodehelper.decode_error_handler(space) + u = str_decode_ascii( # try again, to get the error right + s, len(s), None, final=True, errorhandler=eh)[0] + return space.wrap(u) if encoding == 'utf-8': s = space.charbuf_w(w_obj) eh = unicodehelper.decode_error_handler(space) diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -1009,6 +1009,16 @@ result.append(r) return result.build(), pos +# An elidable version, for a subset of the cases +@jit.elidable +def fast_str_decode_ascii(s): + result = UnicodeBuilder(len(s)) + for c in s: + if ord(c) >= 128: + raise ValueError + result.append(unichr(ord(c))) + return result.build() + # Specialize on the errorhandler when it's a constant @specialize.arg_or_var(3) diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -139,6 +139,12 @@ for encoding in "utf-8 latin-1 ascii".split(): self.checkdecode(chr(i), encoding) + def test_fast_str_decode_ascii(self): + u = runicode.fast_str_decode_ascii("abc\x00\x7F") + assert type(u) is unicode + assert u == u"abc\x00\x7F" + py.test.raises(ValueError, runicode.fast_str_decode_ascii, "ab\x80") + def test_all_first_256(self): for i in range(256): for encoding in ("utf-7 utf-8 latin-1 utf-16 utf-16-be utf-16-le " _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit