Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95268:e625965aa9cc
Date: 2018-11-01 18:17 +0200
http://bitbucket.org/pypy/pypy/changeset/e625965aa9cc/
Log: avoid IndexError in codepoint_at_pos
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -155,18 +155,19 @@
def codepoint_at_pos(code, pos):
""" Give a codepoint in code at pos - assumes valid utf8, no checking!
"""
+ lgt = len(code)
ordch1 = ord(code[pos])
- if ordch1 <= 0x7F:
+ if ordch1 <= 0x7F or pos +1 >= lgt:
return ordch1
ordch2 = ord(code[pos+1])
- if ordch1 <= 0xDF:
+ if ordch1 <= 0xDF or pos +2 >= lgt:
# 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
return (ordch1 << 6) + ordch2 - (
(0xC0 << 6) + 0x80 )
ordch3 = ord(code[pos+2])
- if ordch1 <= 0xEF:
+ if ordch1 <= 0xEF or pos + 3 >= lgt:
# 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
return (ordch1 << 12) + (ordch2 << 6) + ordch3 - (
(0xE0 << 12) + (0x80 << 6) + 0x80 )
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit