Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95769:fcbf4dcf5b74 Date: 2019-02-01 15:50 +0200 http://bitbucket.org/pypy/pypy/changeset/fcbf4dcf5b74/
Log: rework to avoid uni.decode diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -6,6 +6,7 @@ from pypy.interpreter.pyparser.pytokenize import tabsize, alttabsize, whiteSpaceDFA, \ triple_quoted, endDFAs, single_quoted, pseudoDFA from pypy.interpreter.astcompiler import consts +from rpython.rlib import rutf8 NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' NUMCHARS = '0123456789' @@ -46,14 +47,9 @@ def verify_utf8(token): - for c in token: - if ord(c) >= 0x80: - break - else: - return True try: - u = token.decode('utf-8') - except UnicodeDecodeError: + rutf8.check_utf8(token, False) + except ruf8.CheckError: return False return True _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit