Author: Armin Rigo <ar...@tunes.org>
Branch: unicode-utf8
Changeset: r93281:91e03fd0b17e
Date: 2017-12-06 11:13 +0100
http://bitbucket.org/pypy/pypy/changeset/91e03fd0b17e/

Log:    This is probably faster (needs non-ascii to verify)

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -131,23 +131,20 @@
     ordch2 = ord(code[pos+1])
     if ordch1 <= 0xDF:
         # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
-        return (((ordch1 & 0x1F) << 6) +    # 0b00011111
-                 (ordch2 & 0x3F))           # 0b00111111
+        return (ordch1 << 6) + ordch2 - (
+               (0xC0   << 6) + 0x80     )
 
     ordch3 = ord(code[pos+2])
     if ordch1 <= 0xEF:
         # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
-        return (((ordch1 & 0x0F) << 12) +     # 0b00001111
-                ((ordch2 & 0x3F) << 6) +      # 0b00111111
-                (ordch3 & 0x3F))              # 0b00111111
+        return (ordch1 << 12) + (ordch2 << 6) + ordch3 - (
+               (0xE0   << 12) + (0x80   << 6) + 0x80     )
 
     ordch4 = ord(code[pos+3])
     if True:
         # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
-        return (((ordch1 & 0x07) << 18) +      # 0b00000111
-                ((ordch2 & 0x3F) << 12) +      # 0b00111111
-                ((ordch3 & 0x3F) << 6) +       # 0b00111111
-                (ordch4 & 0x3F))               # 0b00111111
+        return (ordch1 << 18) + (ordch2 << 12) + (ordch3 << 6) + ordch4 - (
+               (0xF0   << 18) + (0x80   << 12) + (0x80   << 6) + 0x80     )
     assert False, "unreachable"
 
 def codepoint_before_pos(code, pos):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to