Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95769:fcbf4dcf5b74
Date: 2019-02-01 15:50 +0200
http://bitbucket.org/pypy/pypy/changeset/fcbf4dcf5b74/
Log: rework to avoid uni.decode
diff --git a/pypy/interpreter/pyparser/pytokenizer.py
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -6,6 +6,7 @@
from pypy.interpreter.pyparser.pytokenize import tabsize, alttabsize,
whiteSpaceDFA, \
triple_quoted, endDFAs, single_quoted, pseudoDFA
from pypy.interpreter.astcompiler import consts
+from rpython.rlib import rutf8
NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
NUMCHARS = '0123456789'
@@ -46,14 +47,9 @@
def verify_utf8(token):
- for c in token:
- if ord(c) >= 0x80:
- break
- else:
- return True
try:
- u = token.decode('utf-8')
- except UnicodeDecodeError:
+ rutf8.check_utf8(token, False)
+ except ruf8.CheckError:
return False
return True
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit