#!/usr/bin/python
# Various approaches to a "safe" repr that never generates too much text.
# Demonstrates various approaches to laziness in a Python setting.
# In order to be more comprehensible, doesn't cover every Python type,
# leaving out new-style classes in particular; and it doesn't even try
# to be object-oriented.
# I want to emphasize that this code is not part of any production
# code, and several of the implementations use a number of techniques
# that make it unnecessarily hard to understand. There may be places
# where these techniques are called for, but they should be used very
# sparingly!
# (If you're programming in Haskell, you're probably using these
# techniques without even knowing it. They cause less trouble in
# Haskell due to being implicit in the language.)
# I am writing this warning because in my early days as a programmer,
# I was enthusiastic about learning advanced new algorithms and ways
# of structuring programs, and I would often use them in contexts
# where they weren't really helpful. In this case, the entire point
# of this module is to demonstrate different approaches to structuring
# a problem and understand their strengths and weaknesses; most
# programs have some more practical purpose that includes
# maintainability, which conflicts with complexity and unfamiliar
# program structures.
# If you need something like this module in Python in real life, you
# might try the TextRepr class inside pydoc. It works reasonably well
# for most things, even though it's not too hard to get it to produce
# uselessly voluminous output:
# >>> yy = []
# >>> for ii in range(4): yy = [yy] * 20
# ...
# >>> xx = pydoc.TextRepr().repr(yy)
# Or take several hours to run; just change the 4 to a larger number,
# such as 7.
import types
class UnhandledType(Exception): pass
### Dumb simple version: generate whole thing, then truncate it. 22 lines.
# Doesn't "work" in the sense of "never generating too much text", and
# it's trivial to make it painfully slow; for example:
# >>> x = 'x' * 10485760
# >>> for ii in range(100): x = [x]
# ...
# >>> safe_repr.safe_repr_1(x, 200)
#
"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[['xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# It takes a few seconds to generate that result, as does repr(x)[:200].
# All the other implementations (except for safe_repr_2, which doesn't handle
# strings) generate it instantly.
def _safe_repr_1(val):
if isinstance(val, types.ListType):
return '[%s]' % ', '.join(map(_safe_repr_1, val))
elif isinstance(val, types.TupleType):
return '(%s)' % ', '.join(map(_safe_repr_1, val))
elif isinstance(val, types.IntType):
return '%d' % val
elif isinstance(val, types.DictType):
return '{%s}' % ', '.join(['%s: %s' % (_safe_repr_1(k), _safe_repr_1(v))
for k, v in val.items()])
elif isinstance(val, types.InstanceType):
# note that this is very different behavior from the standard
# Python repr!
return '%s %s' % (val.__class__.__name__, _safe_repr_1(val.__dict__))
elif isinstance(val, types.StringType):
return repr(val) # I'm not going to bother writing out how that works
else:
raise UnhandledType(val)
def safe_repr_1(val, length):
"A dumb version that generates it all, then truncates. For comparison."
return _safe_repr_1(val)[:length]
### Badly programmed version: never append more than you
# need to. 24 lines, incomplete.
def safe_repr_2(val, length):
"""A partial version that never appends more than it needs to.
Too painful to complete."""
rv = []
if isinstance(val, types.ListType):
if length > 0:
rv.append('[')
length -= 1
for ii in range(len(val)):
item = val[ii]
if length <= 0: break
rv.append(safe_repr_2(item, length))
length -= len(rv[-1])
if ii < len(val) - 1:
rv.append(', '[:length])
length -= len(rv[-1])
if length > 0:
rv.append(']')
length -= 1
elif isinstance(val, types.IntType):
rv.append(('%d' % val)[:length])
else:
raise UnhandledType(val)
return ''.join(rv)
### Refactored version: never append more than you need to, and use an
# exception to quit when you've generated enough. 54 lines.
def _safe_repr_3_items(xform, items, buf):
for ii in range(len(items)):
xform(items[ii], buf)
if ii < len(items) - 1: buf.append(', ')
def _safe_repr_3_dict_item((k, v), buf):
_safe_repr_3(k, buf)
buf.append(': ')
_safe_repr_3(v, buf)
def _safe_repr_3(val, buf):
if isinstance(val, types.ListType):
buf.append('[')
_safe_repr_3_items(_safe_repr_3, val, buf)
buf.append(']')
elif isinstance(val, types.TupleType):
buf.append('(')
_safe_repr_3_items(_safe_repr_3, val, buf)
buf.append(')')
elif isinstance(val, types.IntType):
buf.append('%d' % val)
elif isinstance(val, types.DictType):
buf.append('{')
_safe_repr_3_items(_safe_repr_3_dict_item, val.items(), buf)
buf.append('}')
elif isinstance(val, types.InstanceType):
buf.append(val.__class__.__name__ + ' ')
_safe_repr_3(val.__dict__, buf)
elif isinstance(val, types.StringType):
buf.append(repr(val))
else:
raise UnhandledType(val)
class BufferFull(Exception): pass
class Buf:
def __init__(self, maxlen):
self.maxlen = maxlen
self.buf = []
self.curlen = 0
def append(self, astr):
astr = astr[:self.maxlen - self.curlen]
self.buf.append(astr)
self.curlen += len(astr)
if self.curlen == self.maxlen:
raise BufferFull()
def contents(self):
return ''.join(self.buf)
def safe_repr_3(val, length):
"A version that uses an exception to break out of recursion."
buf = Buf(length)
try: _safe_repr_3(val, buf)
except BufferFull: pass
return buf.contents()
### Recursive Generator Version, in which we simply stop iterating
# when we have enough. 49 lines.
# This one is able to handle deeply nested data structures, as long as
# you don't ask it to traverse the whole thing; on my Python, that
# means you're safe as long as you don't want more than 499
# characters.
def _safe_repr_4_items(xform, items):
for ii in range(len(items)):
for item in xform(items[ii]): yield item
if ii < len(items) - 1: yield (', ')
def _safe_repr_4_dict_item((k, v)):
for item in _safe_repr_4(k): yield item
yield ': '
for item in _safe_repr_4(v): yield item
def _safe_repr_4(val):
if isinstance(val, types.ListType):
yield '['
for item in _safe_repr_4_items(_safe_repr_4, val): yield item
yield ']'
elif isinstance(val, types.TupleType):
yield '('
for item in _safe_repr_4_items(_safe_repr_4, val): yield item
yield ')'
elif isinstance(val, types.IntType):
yield '%d' % val
elif isinstance(val, types.DictType):
yield '{'
for item in _safe_repr_4_items(_safe_repr_4_dict_item, val.items()):
yield item
yield '}'
elif isinstance(val, types.InstanceType):
yield val.__class__.__name__ + ' '
for item in _safe_repr_4(val.__dict__): yield item
elif isinstance(val, types.StringType):
# This time I will bother to write it out.
yield "'"
for each_char in val:
if each_char == '\\': yield '\\\\'
elif each_char == "'": yield "\\'"
else: yield each_char
yield "'"
else:
raise UnhandledType(val)
def safe_repr_4(val, length):
"A version using recursive generators."
rv = []
for chunk in _safe_repr_4(val):
chunk = chunk[:length]
length -= len(chunk)
rv.append(chunk)
if length == 0: break
return ''.join(rv)
### Stream version without generators. 64 lines, plus 11 lines of streams.
# Here we see how we could do the same thing more painfully in a
# language with closures but without generators. Surprisingly, this
# is only about 20% more code, if you discount the first four
# functions whose sole reason for being is to manipulate streams.
# However, it did have several bugs in it at first, so I think it's
# clearly harder to understand.
# At present, it dies with a stack overflow on deeply nested data
# structures, even when it shouldn't have to be traversing all the
# levels in order to produce the requested number of characters. In
# that sense, it's "more eager" and "less lazy" than the generator
# version. It's possible to make it lazier to solve that problem, but
# I haven't figured out how yet.
def stream_append(a, b):
afirst, arest = a
if afirst is None: return b
return (afirst, lambda: stream_append(arest(), b))
empty_stream = (None, None)
def one_item_stream(x): return (x, lambda: empty_stream)
def stream_append_n(streams):
streams.reverse()
rv = empty_stream
for stream in streams: rv = stream_append(stream, rv)
return rv
def _safe_repr_5_map(xform, val, ii):
if ii == len(val): return empty_stream
else:
return (xform(val[ii]), lambda: _safe_repr_5_map(xform, val, ii+1))
def _safe_repr_5_items(xform, val, ii):
if ii == len(val): return empty_stream
elif ii == len(val) - 1: return xform(val[ii])
else:
# we explicitly cons up the second stream here to maintain laziness
# in case there are a lot of items
return stream_append(xform(val[ii]),
(", ", lambda: _safe_repr_5_items(xform, val, ii+1)))
def _safe_repr_5_dict_item((k, v)):
return stream_append_n([_safe_repr_5(k), one_item_stream(": "),
_safe_repr_5(v)])
def _safe_repr_5(val):
"A stream is a (first_chunk, stream) pair, or (None, dontcare) for empty."
if isinstance(val, types.ListType):
return stream_append_n([one_item_stream('['),
_safe_repr_5_items(_safe_repr_5, val, 0),
one_item_stream(']')])
elif isinstance(val, types.TupleType):
return stream_append_n([one_item_stream('('),
_safe_repr_5_items(_safe_repr_5, val, 0),
one_item_stream(')')])
elif isinstance(val, types.IntType):
return one_item_stream('%d' % val)
elif isinstance(val, types.DictType):
return stream_append_n([
one_item_stream('{'),
# note that we may lose some laziness here building a big list
_safe_repr_5_items(_safe_repr_5_dict_item, val.items(), 0),
one_item_stream('}')])
elif isinstance(val, types.InstanceType):
return stream_append(one_item_stream(val.__class__.__name__ + ' '),
_safe_repr_5(val.__dict__))
elif isinstance(val, types.StringType):
def chartrans(each_char):
if each_char == '\\': return '\\\\'
elif each_char == "'": return "\\'"
else: return each_char
return stream_append_n([one_item_stream("'"),
_safe_repr_5_map(chartrans, val, 0),
one_item_stream("'")])
return one_item_stream(repr(val))
else:
raise UnhandledType(val)
def safe_repr_5(val, length):
"A version using lazy streams."
rv = []
stream = _safe_repr_5(val)
while 1:
chunk, tail = stream
if chunk is None: break
chunk = chunk[:length]
length -= len(chunk)
rv.append(chunk)
if length == 0: break
stream = tail()
return ''.join(rv)
### Explicit stack version, eschewing recursion and even functions. 61 lines.
# Unsurprisingly, this is nearly the longest version of all, although
# it had surprisingly few bugs.
# In a sense, it's "safer" than the other implementations because it
# can handle data structures that are nested thousands of levels deep
# without trouble.
def safe_repr_6(val, length):
"Explicit stack version, eschewing recursion and functions entirely."
stack = [('val', val)]
rv = []
while stack:
tag, val = stack.pop()
to_add = None
if tag == 'val':
if isinstance(val, types.ListType):
to_add = '['
stack.append(('literal', ']'))
stack.append(('list_contents', (val, 0)))
elif isinstance(val, types.TupleType):
to_add = '('
stack.append(('literal', ')'))
stack.append(('list_contents', (val, 0)))
elif isinstance(val, types.IntType):
to_add = '%d' % val
elif isinstance(val, types.DictType):
to_add = '{'
stack.append(('literal', '}'))
stack.append(('dict_contents', (val.items(), 0)))
elif isinstance(val, types.InstanceType):
to_add = val.__class__.__name__ + ' '
stack.append(('val', val.__dict__))
elif isinstance(val, types.StringType):
to_add = "'"
stack.append(('literal', "'"))
stack.append(('string', (val, 0)))
else:
raise UnhandledType(val)
elif tag == 'literal':
to_add = val
elif tag == 'list_contents' or tag == 'dict_contents':
val, ii = val
if len(val) > 1 + ii:
stack.append((tag, (val, ii + 1)))
stack.append(('literal', ', '))
if len(val) > ii:
item = val[ii]
if tag == 'list_contents':
stack.append(('val', item))
else: # tag == 'dict_contents'
k, v = item
stack.append(('val', v))
stack.append(('literal', ': '))
stack.append(('val', k))
elif tag == 'string':
val, ii = val
if len(val) > ii:
stack.append((tag, (val, ii + 1)))
each_char = val[ii]
if each_char == '\\': to_add = '\\\\'
elif each_char == "'": to_add = "\\'"
else: to_add = each_char
if to_add is not None:
to_add = to_add[:length]
length -= len(to_add)
rv.append(to_add)
if length == 0: break
return ''.join(rv)
### Unit tests, to verify that these functions all work.
def test_fun_on(fun, val):
for length in range(40):
rstr = fun(val, length)
assert len(rstr) <= length, (rstr, val, length, len(rstr))
assert rstr == safe_repr_1(val, length), (rstr,
safe_repr_1(val, length))
def test_fun(fun):
test_fun_on(fun, range(11))
y = []
for ii in range(10): y = [y]
test_fun_on(fun, y)
def test_fun_harder(fun):
class Foo: pass
x = Foo()
x.blarg = 37
test_fun_on(fun, (x, {}, [(131, 'foo')]))
def test_all():
test_fun(safe_repr_1)
test_fun_harder(safe_repr_1)
test_fun(safe_repr_2)
test_fun(safe_repr_3)
test_fun_harder(safe_repr_3)
test_fun(safe_repr_4)
test_fun_harder(safe_repr_4)
test_fun(safe_repr_5)
test_fun_harder(safe_repr_5)
test_fun(safe_repr_6)
test_fun_harder(safe_repr_6)
test_all()