Author: fijal Branch: unicode-utf8 Changeset: r90404:737c72b15c6d Date: 2017-02-27 16:22 +0100 http://bitbucket.org/pypy/pypy/changeset/737c72b15c6d/
Log: fix and a workaround diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -303,6 +303,13 @@ return self.newlist(list_u) return W_ListObject.newlist_unicode(self, list_u) + def newlist_from_unicode(self, lst): + res_w = [] + for u in lst: + assert u is not None + res_w.append(self.newutf8(u, -1)) + return self.newlist(res_w) + def newlist_int(self, list_i): return W_ListObject.newlist_int(self, list_i) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -109,7 +109,7 @@ if self._len() != 1: raise oefmt(space.w_TypeError, "ord() expected a character, but string of length %d " - "found", len(self._value)) + "found", self._len()) return space.newint(rutf8.codepoint_at_pos(self._utf8, 0)) def _new(self, value): @@ -126,6 +126,9 @@ self._length = self._compute_length() return self._length + def _compute_length(self): + return rutf8.compute_length_utf8(self._utf8) + def _val(self, space): return self._utf8.decode('utf8') @@ -156,7 +159,7 @@ @specialize.argtype(1) def _chr(self, char): assert len(char) == 1 - return char[0] + return unichr(ord(char[0])) def _multi_chr(self, unichar): return unichar @@ -513,7 +516,7 @@ def descr_zfill(self, space, width): selfval = self._utf8 if len(selfval) == 0: - return W_UnicodeObject(self._chr('0') * width, width) + return W_UnicodeObject('0' * width, width) num_zeros = width - self._len() if num_zeros <= 0: # cannot return self, in case it is a subclass of str @@ -525,7 +528,7 @@ start = 1 else: start = 0 - builder.append_multiple_char(self._chr('0'), num_zeros) + builder.append_multiple_char('0', num_zeros) builder.append_slice(selfval, start, len(selfval)) return W_UnicodeObject(builder.build(), width) @@ -536,14 +539,14 @@ value = self._utf8 if space.is_none(w_sep): res = split(value, maxsplit=maxsplit) - return space.newlist([W_UnicodeObject(s, -1) for s in res]) + return space.newlist_from_unicode(res) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = split(value, by, maxsplit) - return space.newlist([W_UnicodeObject(s, -1) for s in res]) + return space.newlist_from_unicode(res) @unwrap_spec(maxsplit=int) def descr_rsplit(self, space, w_sep=None, maxsplit=-1): @@ -551,14 +554,14 @@ value = self._utf8 if space.is_none(w_sep): res = rsplit(value, maxsplit=maxsplit) - return space.newlist([W_UnicodeObject(s, -1) for s in res]) + return space.newlist_from_unicode(res) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = rsplit(value, by, maxsplit) - return space.newlist([W_UnicodeObject(s, -1) for s in res]) + return space.newlist_from_unicode(res) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -66,6 +66,14 @@ return pos + 1 return pos + ord(runicode._utf8_code_length[chr1 - 0x80]) +def compute_length_utf8(s): + pos = 0 + lgt = 0 + while pos < len(s): + pos = next_codepoint_pos(s, pos) + lgt += 1 + return lgt + def codepoint_at_pos(code, pos): """ Give a codepoint in code at pos - assumes valid utf8, no checking! """ diff --git a/rpython/rtyper/rmodel.py b/rpython/rtyper/rmodel.py --- a/rpython/rtyper/rmodel.py +++ b/rpython/rtyper/rmodel.py @@ -359,6 +359,10 @@ def ll_str(self, nothing): raise AssertionError("unreachable code") impossible_repr = VoidRepr() +class __extend__(pairtype(Repr, VoidRepr)): + def convert_from_to((r_from, r_to), v, llops): + return inputconst(lltype.Void, None) + class SimplePointerRepr(Repr): "Convenience Repr for simple ll pointer types with no operation on them." _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit