Author: Philip Jenvey <pjen...@underboss.org> Branch: Changeset: r73939:44eef64a93d7 Date: 2014-05-22 17:26 -0700 http://bitbucket.org/pypy/pypy/changeset/44eef64a93d7/
Log: also handle surrogates when hosted on a narrow build (grafted from 556155656b471613725c28fec6602117c714f661) diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -13,6 +13,7 @@ MAXUNICODE = 0xffff allow_surrogate_by_default = True +NARROW_HOST = not we_are_translated() and sys.maxunicode == 0xFFFF BYTEORDER = sys.byteorder # python 2.7 has a preview of py3k behavior, so those functions @@ -65,7 +66,7 @@ if MAXUNICODE > 0xFFFF: def code_to_unichr(code): - if not we_are_translated() and sys.maxunicode == 0xFFFF: + if NARROW_HOST: # Host CPython is narrow build, generate surrogates return unichr_returns_surrogate(code) else: @@ -336,7 +337,8 @@ ch2 = ord(s[pos]) # Check for low surrogate and combine the two to # form a UCS4 value - if ((allow_surrogates or MAXUNICODE < 65536) and + if ((allow_surrogates or MAXUNICODE < 65536 + or NARROW_HOST) and ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF): ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000 pos += 1 @@ -1349,8 +1351,7 @@ # The following logic is enabled only if MAXUNICODE == 0xffff, or # for testing on top of a host Python where sys.maxunicode == 0xffff - if ((MAXUNICODE < 65536 or - (not we_are_translated() and sys.maxunicode < 65536)) + if ((MAXUNICODE < 65536 or NARROW_HOST) and 0xD800 <= oc < 0xDC00 and pos + 1 < size): # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes pos += 1 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit