Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: 
Changeset: r98430:742d3ed68d7d
Date: 2019-12-31 21:04 +0100
http://bitbucket.org/pypy/pypy/changeset/742d3ed68d7d/

Log:    fix issue #3137: rsplit of unicode strings that end with a non-ascii
        char was broken

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -359,6 +359,9 @@
         assert u''.rsplit('aaa') == [u'']
         assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
 
+    def test_rsplit_bug(self):
+        assert u'Vestur- og Mi&#240;'.rsplit() == [u'Vestur-', u'og', 
u'Mi&#240;']
+
     def test_split_rsplit_str_unicode(self):
         x = 'abc'.split(u'b')
         assert x == [u'a', u'c']
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -139,7 +139,7 @@
     if by is None:
         res = []
 
-        i = len(value) - 1
+        i = _decr(value, len(value), isutf8)
         while True:
             # starting from the end, find the end of the next word
             while i >= 0:
diff --git a/rpython/rlib/test/test_rstring.py 
b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -88,6 +88,7 @@
     assert rsplit('baba', 'a', isutf8=1) == ['b', 'b', '']
     assert rsplit('b b', isutf8=1) == ['b', 'b']
     assert rsplit('b\xe1\x9a\x80b', isutf8=1) == ['b', 'b']
+    assert rsplit('b\xe1\x9a\x80', isutf8=1) == ['b']
 
 def test_string_replace():
     def check_replace(value, sub, *args, **kwargs):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to