Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r95131:cdcddb46fce5
Date: 2018-09-16 21:26 +0300
http://bitbucket.org/pypy/pypy/changeset/cdcddb46fce5/

Log:    store utf8 and fix off-by-one

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1206,14 +1206,15 @@
         else:
             res_8, newindex = errorhandler(
                 errors, public_encoding_name, 'surrogates not allowed',
-                s, pos - 1, pos)
-            for cp in rutf8.Utf8StringIterator(res_8):
-                if cp < 0xD800:
+                s, pos, pos+1)
+            #for cp in rutf8.Utf8StringIterator(res_8):
+            for cp in res_8:
+                if cp < 0xD800 or allow_surrogates:
                     _STORECHAR(result, cp, byteorder)
                 else:
                     errorhandler('strict', public_encoding_name,
                                  'surrogates not allowed',
-                                 s, pos-1, pos)
+                                 s, pos, pos+1)
             if index != newindex:  # Should be uncommon
                 index = newindex
                 pos = rutf8._pos_at_index(s, newindex)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to