Hi,
I'm experiencing strange behavior with attached code, that differs depending
on sys.setdefaultencoding being set or not. If it is set, the code works as
expected, if not - what should be the usual case - the code fails with some
non-sensible traceback.
I tried to boil it down to a comprehensible state, but some LOC are still
involved. I use similar code to represent database records, where the repr's
appear in user visible logs for example.
It is greatly appreciated, if some kind soul could shed some light on this
ungrateful behavior.
Thanks in advance,
Pete
#!/usr/bin/env python
# -*- coding: utf-8 -*-
u"""
# this code will usually crash with python 2.7.5, except some poor soul has thrown
# a file called sitecustomize.py into systems site-packages containing:
import sys
sys.setdefaultencoding('utf-8')
# question is, why does it crash without defining a default encoding?
# these are basically two classes with a __repr__ method, where one class
# refers to the other. On a repr, a two level recursion should happen.
# from the log traces, one can see, that in the good case, this is dealt
# well, in the normal case, the recursion isn't handled well: when it
# should continue, it restarts the loop for some reason.
# with default encoding set (comments starting the #):
DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # iterate over these elements of D2Rec
DEBUG: a
DEBUG: b
DEBUG: c
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup'] # iterate over these elements of D1Rec
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup # D1Rec exhausted
DEBUG: e # continue with rest of D2Rec
DEBUG: f # finish
DEBUG: D2Rec( # dump structure
a: 1
b: 2
c: 3
d: [D1Rec(
bla: []
ho: u'äöü'
hu: 'hu'
pi: 3.14
tup: ()
)]
e: u'fünf'
f: 'sechs'
)
# all is good, without (the normal, but failing way):
DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # iterate over these elements of D2Rec
DEBUG: a
DEBUG: b
DEBUG: c
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup'] # iterate over these elements of D1Rec
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup # D1Rec exhausted
DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # it should continue with rest of D1Rec
DEBUG: a # but restarts the process for unknown reasons
DEBUG: b # effectively processing these items twice
DEBUG: c # only to commit suicide thereafter
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup']
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup
Traceback (most recent call last):
File "/usr/lib64/python2.7/logging/__init__.py", line 851, in emit
msg = self.format(record)
File "/usr/lib64/python2.7/logging/__init__.py", line 724, in format
return fmt.format(record)
File "/usr/lib64/python2.7/logging/__init__.py", line 467, in format
s = self._fmt % record.__dict__
File "reprtest.py", line 80, in __repr__
return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.__dict__))
File "reprtest.py", line 66, in frec
ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key])))
File "reprtest.py", line 53, in reprstr
s = repr(s)
UnicodeEncodeError: 'ascii' codec can't encode characters in position 22-24: ordinal not in range(128)
Logged from file reprtest.py, line 108
# Consequently, the traceback doesn't make any sense..
"""
import sys
import logging
logconfig = {
'level': logging.DEBUG,
'format': '%(levelname)s: %(message)s',
'encoding': 'utf8',
}
logging.basicConfig(**logconfig)
log = logging.getLogger(__name__)
def isascii(s):
"""tests a string, if it can be represented as pure ascii"""
return all(ord(c) < 128 for c in s)
def reprstr(s):
"""helper to format values in a python 2 compatible way,
using unicode only, where necessary, and quote strings
"""
if isinstance(s, basestring):
if isascii(s):
s = repr(str(s))
else:
assert isinstance(s, unicode), "only unicode for non ascii strings allowed: %r" % s
s = "u'%s'" % s.replace("'", "\\'")
else:
s = repr(s)
return s
def frec(rec):
'''format a dict in a easy to read sorted record presentation
'''
ret = []
keys = [key for key in rec]
maxklen = len(keys) and max([len(key) for key in keys]) or 0
log.debug(sorted(keys))
for key in sorted(keys):
log.debug(key)
ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key])))
return u'\n'.join(ret)
def recordfactory(classname, **kwargs):
"""record factory, returning a class name classname,
and keyword args assigned as class members
"""
class Record(object):
"""represent a Record, carrying its attributes as class members"""
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __repr__(self):
return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.__dict__))
record = Record(**kwargs)
record.__class__.__name__ = classname
return record
d1rec = recordfactory('D1Rec', **dict(hu = 'hu', ho = u'äöü', pi = 3.14, bla = [], tup = ()))
d2rec = recordfactory('D2Rec', **dict(a = 1, b = 2, c = 3, d = [d1rec], e = u'fünf', f = 'sechs'))
log.debug(d2rec)
--
https://mail.python.org/mailman/listinfo/python-list