Author: fijal
Branch: unicode-utf8
Changeset: r92613:15eb01ac7f57
Date: 2017-10-05 17:14 +0200
http://bitbucket.org/pypy/pypy/changeset/15eb01ac7f57/
Log: whack whack whack;
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -126,7 +126,7 @@
self.orig = handler
def handle(self, errors, encoding, msg, s, pos, endpos):
- s, p, lgt = self.orig(errors, encoding, msg, s, pos, endpos)
+ s, p = self.orig(errors, encoding, msg, s, pos, endpos)
return s.decode("utf8"), p
class EncodeWrapper(object):
@@ -134,8 +134,7 @@
self.orig = handler
def handle(self, errors, encoding, msg, s, pos, endpos):
- s, rs, p, lgt = self.orig(errors, encoding, msg, s.encode("utf8"),
pos, endpos)
- return s, rs, p
+ return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
# some irregular interfaces
def str_decode_utf8(s, slen, errors, final, errorhandler):
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -66,7 +66,7 @@
"position %d from error handler out of bounds",
newpos)
w_replace = space.convert_to_w_unicode(w_replace)
- return w_replace._utf8, newpos, w_replace._length
+ return w_replace._utf8, newpos
return call_errorhandler
def make_decode_errorhandler(self, space):
@@ -443,8 +443,7 @@
# "allow_surrogates=True"
@unwrap_spec(utf8='utf8', errors='text_or_none')
def utf_8_encode(space, utf8, errors="strict"):
- raise Exception('foo')
- return space.newtuple([space.newbytes(utf8), space.newint(utf8len)])
+ return space.newtuple([space.newbytes(utf8),
space.newint(rutf8.check_utf8(utf8))])
#@unwrap_spec(uni=unicode, errors='text_or_none')
#def utf_8_encode(space, uni, errors="strict"):
# if errors is None:
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -776,31 +776,28 @@
def encode_object(space, w_object, encoding, errors):
+ w_encoder = None
if encoding is None:
# Get the encoder functions as a wrapped object.
# This lookup is cached.
w_encoder = space.sys.get_w_default_encoder()
- else:
- if errors is None or errors == 'strict':
- if encoding == 'ascii':
- s = space.utf8_w(w_object)
- try:
- rutf8.check_ascii(s)
- except rutf8.CheckError as a:
- eh = unicodehelper.encode_error_handler(space)
- u_len = w_object._len()
- eh(None, "ascii", "ordinal not in range(128)", s, u_len,
- a.pos, a.pos + 1)
- assert False, "always raises"
- return space.newbytes(s)
- if encoding == 'utf-8':
- u = space.utf8_w(w_object)
- return space.newbytes(u)
- # XXX is this enough?
- #eh = unicodehelper.raise_unicode_exception_encode
- #return space.newbytes(unicode_encode_utf_8(
- # u, len(u), None, errorhandler=eh,
- # allow_surrogates=True))
+ if errors is None or errors == 'strict':
+ if ((encoding is None and space.sys.defaultencoding == 'ascii') or
+ encoding == 'ascii'):
+ s = space.utf8_w(w_object)
+ try:
+ rutf8.check_ascii(s)
+ except rutf8.CheckError as a:
+ eh = unicodehelper.encode_error_handler(space)
+ u_len = w_object._len()
+ eh(None, "ascii", "ordinal not in range(128)", s, u_len,
+ a.pos, a.pos + 1)
+ assert False, "always raises"
+ return space.newbytes(s)
+ if ((encoding is None and space.sys.defaultencoding == 'utf8') or
+ encoding == 'utf-8'):
+ return space.newbytes(space.utf8_w(w_object))
+ if w_encoder is None:
from pypy.module._codecs.interp_codecs import lookup_codec
w_encoder = space.getitem(lookup_codec(space, encoding),
space.newint(0))
if errors is None:
@@ -821,7 +818,6 @@
encoding = getdefaultencoding(space)
if errors is None or errors == 'strict':
if encoding == 'ascii':
- # XXX error handling
s = space.charbuf_w(w_obj)
try:
rutf8.check_ascii(s)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1280,8 +1280,9 @@
collend = pos+1
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
- ru, rs, pos = errorhandler(errors, encoding, reason, p,
+ ru, pos = errorhandler(errors, encoding, reason, p,
collstart, collend)
+ rs = None
if rs is not None:
# py3k only
result.append(rs)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit