Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92252:2a8ae058f62e
Date: 2017-08-24 15:14 +0200
http://bitbucket.org/pypy/pypy/changeset/2a8ae058f62e/
Log: Tweak unicode.splitlines()
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -522,17 +522,18 @@
lgt += 1
eol = pos
if pos < length:
- pos = rutf8.next_codepoint_pos(value, pos)
- # read CRLF as one line break
- if pos < length and value[eol] == '\r' and value[pos] == '\n':
- pos += 1
+ # read CRLF as one line break
+ if (value[pos] == '\r' and pos + 1 < length
+ and value[pos + 1] == '\n'):
+ pos += 2
+ line_end_chars = 2
+ else:
+ pos = rutf8.next_codepoint_pos(value, pos)
+ line_end_chars = 1
if keepends:
- lgt += 1
- if keepends:
- eol = pos
- lgt += 1
- # XXX find out why lgt calculation is off
- strs_w.append(W_UnicodeObject(value[sol:eol], -1))
+ eol = pos
+ lgt += line_end_chars
+ strs_w.append(W_UnicodeObject(value[sol:eol], lgt))
return space.newlist(strs_w)
@unwrap_spec(width=int)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit