Hi,

I'm experiencing strange behavior with attached code, that differs depending 
on sys.setdefaultencoding being set or not. If it is set, the code works as 
expected, if not - what should be the usual case - the code fails with some 
non-sensible traceback.

I tried to boil it down to a comprehensible state, but some LOC are still 
involved. I use similar code to represent database records, where the repr's 
appear in user visible logs for example.

It is greatly appreciated, if some kind soul could shed some light on this 
ungrateful behavior.

Thanks in advance,
Pete
#!/usr/bin/env python
# -*- coding: utf-8 -*-
u"""
# this code will usually crash with python 2.7.5, except some poor soul has thrown
# a file called sitecustomize.py into systems site-packages containing:
import sys
sys.setdefaultencoding('utf-8')

# question is, why does it crash without defining a default encoding?
# these are basically two classes with a __repr__ method, where one class
# refers to the other. On a repr, a two level recursion should happen.
# from the log traces, one can see, that in the good case, this is dealt
# well, in the normal case, the recursion isn't handled well: when it
# should continue, it restarts the loop for some reason.

# with default encoding set (comments starting the #):

DEBUG: ['a', 'b', 'c', 'd', 'e', 'f']           # iterate over these elements of D2Rec
DEBUG: a
DEBUG: b
DEBUG: c
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup']         # iterate over these elements of D1Rec
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup                                      # D1Rec exhausted
DEBUG: e                                        # continue with rest of D2Rec
DEBUG: f                                        # finish
DEBUG: D2Rec(                                   # dump structure
a: 1
b: 2
c: 3
d: [D1Rec(
bla: []
 ho: u'äöü'
 hu: 'hu'
 pi: 3.14
tup: ()
)]
e: u'fünf'
f: 'sechs'
)

# all is good, without (the normal, but failing way):

DEBUG: ['a', 'b', 'c', 'd', 'e', 'f']           # iterate over these elements of D2Rec
DEBUG: a
DEBUG: b
DEBUG: c
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup']         # iterate over these elements of D1Rec
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup                                      # D1Rec exhausted
DEBUG: ['a', 'b', 'c', 'd', 'e', 'f']           # it should continue with rest of D1Rec
DEBUG: a                                        # but restarts the process for unknown reasons
DEBUG: b                                        # effectively processing these items twice
DEBUG: c                                        # only to commit suicide thereafter
DEBUG: d
DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup']
DEBUG: bla
DEBUG: ho
DEBUG: hu
DEBUG: pi
DEBUG: tup
Traceback (most recent call last):
  File "/usr/lib64/python2.7/logging/__init__.py", line 851, in emit
    msg = self.format(record)
  File "/usr/lib64/python2.7/logging/__init__.py", line 724, in format
    return fmt.format(record)
  File "/usr/lib64/python2.7/logging/__init__.py", line 467, in format
    s = self._fmt % record.__dict__
  File "reprtest.py", line 80, in __repr__
    return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.__dict__))
  File "reprtest.py", line 66, in frec
    ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key])))
  File "reprtest.py", line 53, in reprstr
    s = repr(s)
UnicodeEncodeError: 'ascii' codec can't encode characters in position 22-24: ordinal not in range(128)
Logged from file reprtest.py, line 108

# Consequently, the traceback doesn't make any sense..
"""

import sys
import logging

logconfig = {
    'level': logging.DEBUG,
    'format': '%(levelname)s: %(message)s',
    'encoding': 'utf8',
}
logging.basicConfig(**logconfig)

log = logging.getLogger(__name__)


def isascii(s):
    """tests a string, if it can be represented as pure ascii"""
    return all(ord(c) < 128 for c in s)


def reprstr(s):
    """helper to format values in a python 2 compatible way,
       using unicode only, where necessary, and quote strings
    """
    if isinstance(s, basestring):
        if isascii(s):
            s = repr(str(s))
        else:
            assert isinstance(s, unicode), "only unicode for non ascii strings allowed: %r" % s
            s = "u'%s'" % s.replace("'", "\\'")
    else:
        s = repr(s)
    return s


def frec(rec):
    '''format a dict in a easy to read sorted record presentation
    '''
    ret = []
    keys = [key for key in rec]
    maxklen = len(keys) and max([len(key) for key in keys]) or 0
    log.debug(sorted(keys))
    for key in sorted(keys):
        log.debug(key)
        ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key])))
    return u'\n'.join(ret)


def recordfactory(classname, **kwargs):
    """record factory, returning a class name classname,
       and keyword args assigned as class members
    """
    class Record(object):
        """represent a Record, carrying its attributes as class members"""
        def __init__(self, **kwargs):
            self.__dict__.update(kwargs)

        def __repr__(self):
            return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.__dict__))

    record = Record(**kwargs)
    record.__class__.__name__ = classname
    return record


d1rec = recordfactory('D1Rec', **dict(hu = 'hu', ho = u'äöü', pi = 3.14, bla = [], tup = ()))
d2rec = recordfactory('D2Rec', **dict(a = 1, b = 2, c = 3, d = [d1rec], e = u'fünf', f = 'sechs'))
log.debug(d2rec)
-- 
https://mail.python.org/mailman/listinfo/python-list

Reply via email to