Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r95769:fcbf4dcf5b74
Date: 2019-02-01 15:50 +0200
http://bitbucket.org/pypy/pypy/changeset/fcbf4dcf5b74/

Log:    rework to avoid uni.decode

diff --git a/pypy/interpreter/pyparser/pytokenizer.py 
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -6,6 +6,7 @@
 from pypy.interpreter.pyparser.pytokenize import tabsize, alttabsize, 
whiteSpaceDFA, \
     triple_quoted, endDFAs, single_quoted, pseudoDFA
 from pypy.interpreter.astcompiler import consts
+from rpython.rlib import rutf8
 
 NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
 NUMCHARS = '0123456789'
@@ -46,14 +47,9 @@
 
 
 def verify_utf8(token):
-    for c in token:
-        if ord(c) >= 0x80:
-            break
-    else:
-        return True
     try:
-        u = token.decode('utf-8')
-    except UnicodeDecodeError:
+        rutf8.check_utf8(token, False)
+    except ruf8.CheckError:
         return False
     return True
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to