Author: Jeremy Thurgood <[email protected]>
Branch: unicode-utf8
Changeset: r92654:3e28fa9641ac
Date: 2017-10-08 17:09 +0200
http://bitbucket.org/pypy/pypy/changeset/3e28fa9641ac/
Log: Some more unicode methods.
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -489,60 +489,44 @@
builder.append(self._lower(value[i]))
return self._new(builder.build())
+ # This is not used for W_UnicodeObject.
def descr_partition(self, space, w_sub):
from pypy.objspace.std.bytearrayobject import W_BytearrayObject
value = self._val(space)
- if self._use_rstr_ops(space, w_sub):
- sub = self._op_val(space, w_sub)
- sublen = len(sub)
- if sublen == 0:
- raise oefmt(space.w_ValueError, "empty separator")
+ sub = _get_buffer(space, w_sub)
+ sublen = sub.getlength()
+ if sublen == 0:
+ raise oefmt(space.w_ValueError, "empty separator")
- pos = value.find(sub)
- else:
- sub = _get_buffer(space, w_sub)
- sublen = sub.getlength()
- if sublen == 0:
- raise oefmt(space.w_ValueError, "empty separator")
-
- pos = find(value, sub, 0, len(value))
- if pos != -1 and isinstance(self, W_BytearrayObject):
- w_sub = self._new_from_buffer(sub)
+ pos = find(value, sub, 0, len(value))
+ if pos != -1 and isinstance(self, W_BytearrayObject):
+ w_sub = self._new_from_buffer(sub)
if pos == -1:
- if isinstance(self, W_BytearrayObject):
- self = self._new(value)
+ self = self._new(value)
return space.newtuple([self, self._empty(), self._empty()])
else:
return space.newtuple(
[self._sliced(space, value, 0, pos, self), w_sub,
self._sliced(space, value, pos + sublen, len(value), self)])
+ # This is not used for W_UnicodeObject.
def descr_rpartition(self, space, w_sub):
from pypy.objspace.std.bytearrayobject import W_BytearrayObject
value = self._val(space)
- if self._use_rstr_ops(space, w_sub):
- sub = self._op_val(space, w_sub)
- sublen = len(sub)
- if sublen == 0:
- raise oefmt(space.w_ValueError, "empty separator")
+ sub = _get_buffer(space, w_sub)
+ sublen = sub.getlength()
+ if sublen == 0:
+ raise oefmt(space.w_ValueError, "empty separator")
- pos = value.rfind(sub)
- else:
- sub = _get_buffer(space, w_sub)
- sublen = sub.getlength()
- if sublen == 0:
- raise oefmt(space.w_ValueError, "empty separator")
-
- pos = rfind(value, sub, 0, len(value))
- if pos != -1 and isinstance(self, W_BytearrayObject):
- w_sub = self._new_from_buffer(sub)
+ pos = rfind(value, sub, 0, len(value))
+ if pos != -1 and isinstance(self, W_BytearrayObject):
+ w_sub = self._new_from_buffer(sub)
if pos == -1:
- if isinstance(self, W_BytearrayObject):
- self = self._new(value)
+ self = self._new(value)
return space.newtuple([self._empty(), self._empty(), self])
else:
return space.newtuple(
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -412,20 +412,9 @@
return W_UnicodeObject(result.build(), result_length)
def descr_find(self, space, w_sub, w_start=None, w_end=None):
- start, end = unwrap_start_stop(space, self._length, w_start, w_end)
-
w_sub = self.convert_arg_to_w_unicode(space, w_sub)
- # XXX for now just create index
- start_index = 0
- end_index = len(self._utf8)
- if start > 0 or end != self._length:
- storage = self._get_index_storage()
- if start > 0:
- start_index = rutf8.codepoint_position_at_index(self._utf8,
- storage, start)
- if end != self._length:
- end_index = rutf8.codepoint_position_at_index(self._utf8,
- storage, end)
+ start_index, end_index = self._unwrap_and_compute_idx_params(
+ space, w_start, w_end)
res_index = self._utf8.find(w_sub._utf8, start_index, end_index)
if res_index == -1:
@@ -434,6 +423,44 @@
res = rutf8.check_utf8(self._utf8, force_len=res_index) # can't raise
return space.newint(res)
+ def descr_rfind(self, space, w_sub, w_start=None, w_end=None):
+ w_sub = self.convert_arg_to_w_unicode(space, w_sub)
+ start_index, end_index = self._unwrap_and_compute_idx_params(
+ space, w_start, w_end)
+
+ res_index = self._utf8.rfind(w_sub._utf8, start_index, end_index)
+ if res_index == -1:
+ return space.newint(-1)
+
+ res = rutf8.check_utf8(self._utf8, force_len=res_index) # can't raise
+ return space.newint(res)
+
+ def descr_index(self, space, w_sub, w_start=None, w_end=None):
+ w_sub = self.convert_arg_to_w_unicode(space, w_sub)
+ start_index, end_index = self._unwrap_and_compute_idx_params(
+ space, w_start, w_end)
+
+ res_index = self._utf8.find(w_sub._utf8, start_index, end_index)
+ if res_index == -1:
+ raise oefmt(space.w_ValueError,
+ "substring not found in string.index")
+
+ res = rutf8.check_utf8(self._utf8, force_len=res_index) # can't raise
+ return space.newint(res)
+
+ def descr_rindex(self, space, w_sub, w_start=None, w_end=None):
+ w_sub = self.convert_arg_to_w_unicode(space, w_sub)
+ start_index, end_index = self._unwrap_and_compute_idx_params(
+ space, w_start, w_end)
+
+ res_index = self._utf8.rfind(w_sub._utf8, start_index, end_index)
+ if res_index == -1:
+ raise oefmt(space.w_ValueError,
+ "substring not found in string.rindex")
+
+ res = rutf8.check_utf8(self._utf8, force_len=res_index) # can't raise
+ return space.newint(res)
+
def descr_encode(self, space, w_encoding=None, w_errors=None):
encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
@@ -548,7 +575,7 @@
return space.newbool(cased)
def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
- start, end = unwrap_start_stop(space, self._length, w_start, w_end)
+ start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
value = self._utf8
if space.isinstance_w(w_prefix, space.w_tuple):
return self._startswith_tuple(space, value, w_prefix, start, end)
@@ -562,7 +589,7 @@
return startswith(value, prefix, start, end)
def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
- start, end = unwrap_start_stop(space, self._length, w_start, w_end)
+ start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
value = self._utf8
if space.isinstance_w(w_suffix, space.w_tuple):
return self._endswith_tuple(space, value, w_suffix, start, end)
@@ -739,11 +766,50 @@
return W_UnicodeObject(centered, self._len() + d)
+ def descr_count(self, space, w_sub, w_start=None, w_end=None):
+ value = self._utf8
+ start_index, end_index = self._unwrap_and_compute_idx_params(
+ space, w_start, w_end)
+ sub = self.convert_arg_to_w_unicode(space, w_sub)._utf8
+ return space.newint(value.count(sub, start_index, end_index))
+
def descr_contains(self, space, w_sub):
value = self._utf8
w_other = self.convert_arg_to_w_unicode(space, w_sub)
return space.newbool(value.find(w_other._utf8) >= 0)
+ def descr_partition(self, space, w_sub):
+ value = self._utf8
+ sub = self.convert_arg_to_w_unicode(space, w_sub)
+ sublen = sub._len()
+ if sublen == 0:
+ raise oefmt(space.w_ValueError, "empty separator")
+
+ pos = value.find(sub._utf8)
+
+ if pos == -1:
+ return space.newtuple([self, self._empty(), self._empty()])
+ else:
+ return space.newtuple(
+ [self._sliced(space, value, 0, pos, self), w_sub,
+ self._sliced(space, value, pos + sublen, len(value), self)])
+
+ def descr_rpartition(self, space, w_sub):
+ value = self._utf8
+ sub = self.convert_arg_to_w_unicode(space, w_sub)
+ sublen = sub._len()
+ if sublen == 0:
+ raise oefmt(space.w_ValueError, "empty separator")
+
+ pos = value.rfind(sub._utf8)
+
+ if pos == -1:
+ return space.newtuple([self._empty(), self._empty(), self])
+ else:
+ return space.newtuple(
+ [self._sliced(space, value, 0, pos, self), w_sub,
+ self._sliced(space, value, pos + sublen, len(value), self)])
+
@unwrap_spec(count=int)
def descr_replace(self, space, w_old, w_new, count=-1):
@@ -786,13 +852,38 @@
return self._index_storage
def _getitem_result(self, space, index):
- if index >= self._length:
+ if index < 0:
+ index += self._length
+ if index < 0 or index >= self._length:
raise oefmt(space.w_IndexError, "string index out of range")
storage = self._get_index_storage()
start = rutf8.codepoint_position_at_index(self._utf8, storage, index)
end = rutf8.next_codepoint_pos(self._utf8, start)
return W_UnicodeObject(self._utf8[start:end], 1)
+ def _unwrap_and_compute_idx_params(self, space, w_start, w_end):
+ start, end = unwrap_start_stop(space, self._length, w_start, w_end)
+ # XXX for now just create index
+ start_index = 0
+ end_index = len(self._utf8)
+ if start > 0 or end != self._length:
+ storage = self._get_index_storage()
+ if start > 0:
+ # :-(
+ if start > self._length:
+ start_index = start
+ else:
+ start_index = rutf8.codepoint_position_at_index(
+ self._utf8, storage, start)
+ if end != self._length:
+ # :-(
+ if end > self._length:
+ end_index = end
+ else:
+ end_index = rutf8.codepoint_position_at_index(
+ self._utf8, storage, end)
+ return (start_index, end_index)
+
@unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
def descr_rjust(self, space, width, w_fillchar):
value = self._utf8
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit