Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r55226:a9be44dc22c8
Date: 2012-05-31 01:46 +0200
http://bitbucket.org/pypy/pypy/changeset/a9be44dc22c8/
Log: Many fixes in rope objects for py3k
diff --git a/pypy/objspace/std/ropeobject.py b/pypy/objspace/std/ropeobject.py
--- a/pypy/objspace/std/ropeobject.py
+++ b/pypy/objspace/std/ropeobject.py
@@ -8,6 +8,7 @@
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
from pypy.objspace.std import stringobject, slicetype, iterobject
from pypy.objspace.std.listobject import W_ListObject
+from pypy.objspace.std.longobject import W_LongObject
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.tupleobject import W_TupleObject
from pypy.rlib.rarithmetic import ovfcheck
@@ -32,7 +33,7 @@
def unwrap(w_self, space):
return w_self._node.flatten_string()
- str_w = unwrap
+ bytes_w = unwrap
def create_if_subclassed(w_self):
if type(w_self) is W_RopeObject:
@@ -48,7 +49,7 @@
def rope_w(space, w_str):
if isinstance(w_str, W_RopeObject):
return w_str._node
- return rope.LiteralStringNode(space.str_w(w_str))
+ return rope.LiteralStringNode(space.bufferstr_w(w_str))
registerimplementation(W_RopeObject)
@@ -231,10 +232,10 @@
for node in rope.split_chars(selfnode, maxsplit)]
return space.newlist(res_w)
-def str_split__Rope_Rope_ANY(space, w_self, w_by, w_maxsplit=-1):
+def str_split__Rope_ANY_ANY(space, w_self, w_by, w_maxsplit=-1):
maxsplit = space.int_w(w_maxsplit)
selfnode = w_self._node
- bynode = w_by._node
+ bynode = rope_w(space, w_by)
bylen = bynode.length()
if bylen == 0:
raise OperationError(space.w_ValueError, space.wrap("empty separator"))
@@ -251,13 +252,13 @@
return space.newlist(res_w)
-def str_rsplit__Rope_Rope_ANY(space, w_self, w_by, w_maxsplit=-1):
+def str_rsplit__Rope_ANY_ANY(space, w_self, w_by, w_maxsplit=-1):
# XXX works but flattens
maxsplit = space.int_w(w_maxsplit)
res_w = []
value = w_self._node.flatten_string()
end = len(value)
- by = w_by._node.flatten_string()
+ by = space.bufferstr_w(w_by)
bylen = len(by)
if bylen == 0:
raise OperationError(space.w_ValueError, space.wrap("empty separator"))
@@ -266,11 +267,11 @@
next = value.rfind(by, 0, end)
if next < 0:
break
- res_w.append(space.wrap(value[next+bylen: end]))
+ res_w.append(space.wrapbytes(value[next+bylen: end]))
end = next
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
- res_w.append(space.wrap(value[:end]))
+ res_w.append(space.wrapbytes(value[:end]))
res_w.reverse()
return space.newlist(res_w)
@@ -283,25 +284,24 @@
if size == 1:
w_s = list_w[0]
# only one item, return it if it's not a subclass of str
- if (space.is_w(space.type(w_s), space.w_str) or
- space.is_w(space.type(w_s), space.w_unicode)):
+ if space.is_w(space.type(w_s), space.w_str):
return w_s
self = w_self._node
l = []
for i in range(size):
w_s = list_w[i]
- if not space.isinstance_w(w_s, space.w_str):
- if space.isinstance_w(w_s, space.w_unicode):
- w_u = space.call_function(space.w_unicode, w_self)
- return space.call_method(w_u, "join", space.newlist(list_w))
+ try:
+ item = rope_w(space, w_s)
+ except OperationError, e:
+ if not e.match(space, space.w_TypeError):
+ raise
raise operationerrfmt(
space.w_TypeError,
- "sequence item %d: expected string, %s "
+ "sequence item %d: expected bytes, %s "
"found", i, space.type(w_s).getname(space))
- assert isinstance(w_s, W_RopeObject)
- node = w_s._node
- l.append(node)
+ assert isinstance(item, rope.LiteralStringNode)
+ l.append(item)
try:
return W_RopeObject(rope.join(self, l))
except OverflowError:
@@ -311,7 +311,7 @@
def str_rjust__Rope_ANY_ANY(space, w_self, w_arg, w_fillchar):
u_arg = space.int_w(w_arg)
selfnode = w_self._node
- fillchar = space.str_w(w_fillchar)
+ fillchar = space.bytes_w(w_fillchar)
if len(fillchar) != 1:
raise OperationError(space.w_TypeError,
space.wrap("rjust() argument 2 must be a single character"))
@@ -330,7 +330,7 @@
def str_ljust__Rope_ANY_ANY(space, w_self, w_arg, w_fillchar):
u_arg = space.int_w(w_arg)
selfnode = w_self._node
- fillchar = space.str_w(w_fillchar)
+ fillchar = space.bytes_w(w_fillchar)
if len(fillchar) != 1:
raise OperationError(space.w_TypeError,
space.wrap("rjust() argument 2 must be a single character"))
@@ -346,32 +346,48 @@
else:
return W_RopeObject(selfnode)
-def _convert_idx_params(space, w_self, w_sub, w_start, w_end,
upper_bound=False):
+def _convert_idx_params(space, w_self, w_start, w_end, upper_bound=False):
self = w_self._node
- sub = w_sub._node
start, end = slicetype.unwrap_start_stop(
space, self.length(), w_start, w_end, upper_bound)
- return (self, sub, start, end)
+ return (self, start, end)
_convert_idx_params._annspecialcase_ = 'specialize:arg(5)'
-def contains__Rope_Rope(space, w_self, w_sub):
+def contains__Rope_ANY(space, w_self, w_sub):
self = w_self._node
- sub = w_sub._node
+ sub = rope_w(space, w_sub)
return space.newbool(rope.find(self, sub) >= 0)
-def str_find__Rope_Rope_ANY_ANY(space, w_self, w_sub, w_start, w_end):
+def contains__Rope_Long(space, w_self, w_char):
+ self = w_self._node
+ try:
+ char = space.int_w(w_char)
+ except OperationError, e:
+ if e.match(space, space.w_OverflowError):
+ char = 256 # arbitrary value which will trigger the ValueError
+ # condition below
+ else:
+ raise
+ if 0 <= char < 256:
+ sub = rope.LiteralStringNode.PREBUILT[char]
+ return space.newbool(rope.find(self, sub) >= 0)
+ else:
+ raise OperationError(space.w_ValueError,
+ space.wrap("character must be in range(256)"))
- (self, sub, start, end) = _convert_idx_params(space, w_self, w_sub,
w_start, w_end)
- res = rope.find(self, sub, start, end)
+def str_find__Rope_ANY_ANY_ANY(space, w_self, w_sub, w_start, w_end):
+
+ (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end)
+ res = rope.find(self, rope_w(space, w_sub), start, end)
return wrapint(space, res)
-def str_rfind__Rope_Rope_ANY_ANY(space, w_self, w_sub, w_start, w_end):
+def str_rfind__Rope_ANY_ANY_ANY(space, w_self, w_sub, w_start, w_end):
# XXX works but flattens
- (self, sub, start, end) = _convert_idx_params(space, w_self, w_sub,
w_start, w_end)
+ (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end)
self = self.flatten_string()
- sub = sub.flatten_string()
+ sub = space.bufferstr_w(w_sub)
res = self.rfind(sub, start, end)
return wrapint(space, res)
@@ -413,8 +429,8 @@
def str_index__Rope_Rope_ANY_ANY(space, w_self, w_sub, w_start, w_end):
- (self, sub, start, end) = _convert_idx_params(space, w_self, w_sub,
w_start, w_end)
- res = rope.find(self, sub, start, end)
+ (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end)
+ res = rope.find(self, w_sub._node, start, end)
if res < 0:
raise OperationError(space.w_ValueError,
space.wrap("substring not found in string.index"))
@@ -423,10 +439,10 @@
def str_rindex__Rope_Rope_ANY_ANY(space, w_self, w_sub, w_start, w_end):
- (self, sub, start, end) = _convert_idx_params(space, w_self, w_sub,
w_start, w_end)
+ (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end)
# XXX works but flattens
self = self.flatten_string()
- sub = sub.flatten_string()
+ sub = w_sub._node.flatten_string()
res = self.rfind(sub, start, end)
if res < 0:
raise OperationError(space.w_ValueError,
@@ -474,23 +490,23 @@
def _contains(i, string):
return chr(i) in string
-def str_strip__Rope_Rope(space, w_self, w_chars):
+def str_strip__Rope_ANY(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, True, True,
- _contains, w_chars._node.flatten_string()))
+ _contains, space.bytes_w(w_chars)))
def str_strip__Rope_None(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, left=True, right=True))
-def str_rstrip__Rope_Rope(space, w_self, w_chars):
+def str_rstrip__Rope_ANY(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, False, True,
- _contains, w_chars._node.flatten_string()))
+ _contains, space.bytes_w(w_chars)))
def str_rstrip__Rope_None(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, False, True))
-def str_lstrip__Rope_Rope(space, w_self, w_chars):
+def str_lstrip__Rope_ANY(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, True, False,
- _contains, w_chars._node.flatten_string()))
+ _contains, space.bytes_w(w_chars)))
def str_lstrip__Rope_None(space, w_self, w_chars):
return W_RopeObject(rope.strip(w_self._node, left=True, right=False))
@@ -500,7 +516,7 @@
node = w_self._node
length = node.length()
arg = space.int_w(w_arg)
- fillchar = space.str_w(w_fillchar)
+ fillchar = space.bytes_w(w_fillchar)
if len(fillchar) != 1:
raise OperationError(space.w_TypeError,
space.wrap("center() argument 2 must be a single character"))
@@ -536,16 +552,19 @@
return wrapint(space, i)
+def str_endswith__Rope_ANY_ANY_ANY(space, w_self, w_suffix, w_start, w_end):
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
+ return space.newbool(rope.endswith(self, w_suffix.node, start, end))
+
def str_endswith__Rope_Rope_ANY_ANY(space, w_self, w_suffix, w_start, w_end):
- (self, suffix, start, end) = _convert_idx_params(space, w_self,
- w_suffix, w_start, w_end,
- True)
- return space.newbool(rope.endswith(self, suffix, start, end))
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
+ return space.newbool(rope.endswith(self, w_suffix._node, start, end))
def str_endswith__Rope_Tuple_ANY_ANY(space, w_self, w_suffixes, w_start,
w_end):
- (self, _, start, end) = _convert_idx_params(space, w_self,
- W_RopeObject.EMPTY, w_start,
- w_end, True)
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
for w_suffix in space.fixedview(w_suffixes):
if space.isinstance_w(w_suffix, space.w_unicode):
w_u = space.call_function(space.w_unicode, w_self)
@@ -557,15 +576,19 @@
return space.w_False
+def str_startswith__Rope_ANY_ANY_ANY(space, w_self, w_prefix, w_start, w_end):
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
+ return space.newbool(rope.startswith(self, w_prefix._node, start, end))
+
def str_startswith__Rope_Rope_ANY_ANY(space, w_self, w_prefix, w_start, w_end):
- (self, prefix, start, end) = _convert_idx_params(space, w_self,
- w_prefix, w_start, w_end,
- True)
- return space.newbool(rope.startswith(self, prefix, start, end))
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
+ return space.newbool(rope.startswith(self, w_prefix._node, start, end))
def str_startswith__Rope_Tuple_ANY_ANY(space, w_self, w_prefixes, w_start,
w_end):
- (self, _, start, end) = _convert_idx_params(space, w_self,
W_RopeObject.EMPTY,
- w_start, w_end, True)
+ (self, start, end) = _convert_idx_params(
+ space, w_self, w_start, w_end, True)
for w_prefix in space.fixedview(w_prefixes):
if space.isinstance_w(w_prefix, space.w_unicode):
w_u = space.call_function(space.w_unicode, w_self)
@@ -579,6 +602,8 @@
def _tabindent(node, tabsize):
"calculates distance after the token to the next tabstop"
+ if tabsize <= 0:
+ return tabsize
length = node.length()
distance = tabsize
if length:
@@ -696,7 +721,7 @@
if ival < 0 or ival >= slen:
raise OperationError(space.w_IndexError,
space.wrap("string index out of range"))
- return wrapchar(space, node.getchar(ival))
+ return space.wrap(ord(node.getchar(ival)))
def getitem__Rope_Slice(space, w_str, w_slice):
node = w_str._node
@@ -741,9 +766,9 @@
return space.wrap(w_str._node.length())
def str__Rope(space, w_str):
- if type(w_str) is W_RopeObject:
- return w_str
- return W_RopeObject(w_str._node)
+ if space.sys.get_flag('bytes_warning'):
+ space.warn("str() on a bytes instance", space.w_BytesWarning)
+ return repr__Rope(space, w_str)
def iter__Rope(space, w_str):
return W_RopeIterObject(w_str)
@@ -761,54 +786,8 @@
return space.newtuple([W_RopeObject(w_str._node)])
def repr__Rope(space, w_str):
- node = w_str._node
- length = node.length()
-
- i = 0
- buf = [' '] * (length * 4 + 2) # safely overallocate
-
- quote = "'"
- if (rope.find_int(node, ord(quote)) != -1 and
- rope.find_int(node, ord('"')) == -1):
- quote = '"'
-
- buf[0] = quote
-
- iter = rope.ItemIterator(node)
- while 1:
- try:
- c = iter.nextchar()
- i += 1
- except StopIteration:
- break
- bs_char = None # character quoted by backspace
-
- if c == '\\' or c == quote:
- bs_char = c
- elif c == '\t': bs_char = 't'
- elif c == '\r': bs_char = 'r'
- elif c == '\n': bs_char = 'n'
- elif not '\x20' <= c < '\x7f':
- n = ord(c)
- buf[i] = '\\'
- i += 1
- buf[i] = 'x'
- i += 1
- buf[i] = "0123456789abcdef"[n>>4]
- i += 1
- buf[i] = "0123456789abcdef"[n&0xF]
- else:
- buf[i] = c
-
- if bs_char is not None:
- buf[i] = '\\'
- i += 1
- buf[i] = bs_char
-
- i += 1
- buf[i] = quote
-
- return W_RopeObject(rope.rope_from_charlist(buf[:i+1]))
+ return space.wrap(stringobject.string_escape_encode(
+ space.bytes_w(w_str), True))
def str_translate__Rope_ANY_ANY(space, w_string, w_table, w_deletechars=''):
"""charfilter - unicode handling is not implemented
@@ -868,7 +847,7 @@
raise OperationError(space.w_StopIteration, space.w_None)
try:
char = w_ropeiter.item_iter.nextchar()
- w_item = wrapchar(space, char)
+ w_item = space.wrap(ord(char))
except StopIteration:
w_ropeiter.node = None
w_ropeiter.char_iter = None
diff --git a/pypy/objspace/std/ropeunicodeobject.py
b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -9,6 +9,7 @@
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.rlib import rope
from pypy.rlib.rstring import StringBuilder
+from pypy.rlib.runicode import unicode_encode_unicode_escape
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
from pypy.objspace.std import unicodeobject, slicetype, iterobject
from pypy.objspace.std.tupleobject import W_TupleObject
@@ -141,7 +142,9 @@
return ''.join(result)
def str__RopeUnicode(space, w_uni):
- return space.call_method(w_uni, 'encode')
+ if type(w_uni) is W_RopeUnicodeObject:
+ return w_uni
+ return W_RopeUnicodeObject(w_uni._node)
def lt__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
n1 = w_str1._node
@@ -334,6 +337,31 @@
else:
previous_is_cased = False
+def unicode_isidentifier__RopeUnicode(space, w_unicode):
+ if w_unicode._node.length() == 0:
+ return space.w_False
+ iter = rope.ItemIterator(w_unicode._node)
+
+ # PEP 3131 says that the first character must be in XID_Start and
+ # subsequent characters in XID_Continue, and for the ASCII range,
+ # the 2.x rules apply (i.e start with letters and underscore,
+ # continue with letters, digits, underscore). However, given the
+ # current definition of XID_Start and XID_Continue, it is
+ # sufficient to check just for these, except that _ must be
+ # allowed as starting an identifier.
+ first = iter.nextint()
+ if not (unicodedb.isxidstart(first) or first == ord('_')):
+ return space.w_False
+
+ while 1:
+ try:
+ ch = iter.nextint()
+ except StopIteration:
+ break
+ if not unicodedb.isxidcontinue(ch):
+ return space.w_False
+ return space.w_True
+
def _contains(i, uni):
return unichr(i) in uni
@@ -782,102 +810,11 @@
# Move this into the _codecs module as 'unicodeescape_string (Remember to
cater for quotes)'
def repr__RopeUnicode(space, w_unicode):
- hexdigits = "0123456789abcdef"
node = w_unicode._node
+ chars = node.flatten_unicode()
size = node.length()
-
- singlequote = doublequote = False
- iter = rope.ItemIterator(node)
- for i in range(size):
- c = iter.nextunichar()
- if singlequote and doublequote:
- break
- if c == u'\'':
- singlequote = True
- elif c == u'"':
- doublequote = True
- if singlequote and not doublequote:
- quote = '"'
- else:
- quote = '\''
- result = ['u', quote]
- iter = rope.ItemIterator(node)
- j = 0
- while j < size:
- code = iter.nextint()
- if code >= 0x10000:
- result.extend(['\\', "U",
- hexdigits[(code >> 28) & 0xf],
- hexdigits[(code >> 24) & 0xf],
- hexdigits[(code >> 20) & 0xf],
- hexdigits[(code >> 16) & 0xf],
- hexdigits[(code >> 12) & 0xf],
- hexdigits[(code >> 8) & 0xf],
- hexdigits[(code >> 4) & 0xf],
- hexdigits[(code >> 0) & 0xf],
- ])
- j += 1
- continue
- if code >= 0xD800 and code < 0xDC00:
- if j < size - 1:
- code2 = iter.nextint()
- # XXX this is wrong: if the next if is false,
- # code2 is lost
- if code2 >= 0xDC00 and code2 <= 0xDFFF:
- code = (((code & 0x03FF) << 10) | (code2 & 0x03FF)) +
0x00010000
- result.extend(['\\', "U",
- hexdigits[(code >> 28) & 0xf],
- hexdigits[(code >> 24) & 0xf],
- hexdigits[(code >> 20) & 0xf],
- hexdigits[(code >> 16) & 0xf],
- hexdigits[(code >> 12) & 0xf],
- hexdigits[(code >> 8) & 0xf],
- hexdigits[(code >> 4) & 0xf],
- hexdigits[(code >> 0) & 0xf],
- ])
- j += 2
- continue
-
- if code >= 0x100:
- result.extend(['\\', "u",
- hexdigits[(code >> 12) & 0xf],
- hexdigits[(code >> 8) & 0xf],
- hexdigits[(code >> 4) & 0xf],
- hexdigits[(code >> 0) & 0xf],
- ])
- j += 1
- continue
- if code == ord('\\') or code == ord(quote):
- result.append('\\')
- result.append(chr(code))
- j += 1
- continue
- if code == ord('\t'):
- result.append('\\')
- result.append('t')
- j += 1
- continue
- if code == ord('\r'):
- result.append('\\')
- result.append('r')
- j += 1
- continue
- if code == ord('\n'):
- result.append('\\')
- result.append('n')
- j += 1
- continue
- if code < ord(' ') or code >= 0x7f:
- result.extend(['\\', "x",
- hexdigits[(code >> 4) & 0xf],
- hexdigits[(code >> 0) & 0xf],
- ])
- j += 1
- continue
- result.append(chr(code))
- j += 1
- result.append(quote)
- return W_RopeObject(rope.rope_from_charlist(result))
+ s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+ return space.wrap(s)
def mod__RopeUnicode_ANY(space, w_format, w_values):
return mod_format(space, w_format, w_values, do_unicode=True)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit