Like other things posted here without notices to the contrary, this
code is in the public domain.

#!/usr/bin/python
# I'd use the MetaPy dict mixin if I had one handy
"""A crude dictionary backed up by log-structured file storage.

This hack is just a proof of concept.  The concept is that it's really
easy to store data structures as the sequence of updates to the data
structure --- "logical logging" in OLTP/RDBMS-speak.  In theory this
could be the fastest possible way to store things, because your
storing is all sequential --- only your retrieval needs to be random.
In this implementation, it's far from fast, because it flushes the
file every time it writes a record.  It's also far from robust,
because it doesn't fsync().

"""


from __future__ import generators, nested_scopes
import os, urllib

quote = urllib.quote_plus
unquote = urllib.unquote_plus

def ok(a, b): assert a == b, (a, b)

class wrong_exception(AssertionError): pass
def assert_raises(subr, exc):
    try:
        subr()
    except exc:
        return 1
    except:
        exc_type, exc_value, tb = sys.exc_info()
        raise wrong_exception, wrong_exception(exc_value), tb
    else:
        raise wrong_exception("no %s exception" % exc)

class logdict:
    def __init__(self, filename):
        self.file = file(filename, 'a+')
        self.contents = {}
        self.file.seek(0)
        deltoken = 'del '
        for line in self.file:
            assert line.endswith('\n'), line
            line = line[:-1]
            if '=' in line:
                (name, value) = line.split('=')
                self.contents[unquote(name)] = unquote(value)
            elif line.startswith(deltoken):
                del self.contents[unquote(line[len(deltoken):])]
            else:
                assert 0, line
    def log(self, msg):
        self.file.write(msg)
        self.file.flush()
    def __getitem__(self, name): return self.contents[name]
    def __setitem__(self, name, value):
        self.contents[name] = value
        self.log('%s=%s\n' % (quote(name), quote(value)))
    def __delitem__(self, name):
        del self.contents[name]
        self.log('del %s\n' % quote(name))
    def iterkeys(self): return self.contents.iterkeys()

def test_basics(dicttype):
    os.unlink('tmp.hash')
    foo = dicttype('tmp.hash')
    assert_raises(lambda: foo['bar'], KeyError)
    foo['bar'] = 'baz'
    ok(foo['bar'], 'baz')

    foo['glork'] = 'glick'
    ok(foo['glork'], 'glick')
    ok(foo['bar'], 'baz')

    del foo['bar']
    ok(foo['glork'], 'glick')
    assert_raises(lambda: foo['bar'], KeyError)
    foo['del \n='] = 'x=y\nz'
    ok(foo['del \n='], 'x=y\nz')
    
def test_persistence(dicttype):
    foo = dicttype('tmp.hash')
    ok(foo['glork'], 'glick')
    ok(foo['del \n='], 'x=y\nz')

def test_dicttype(dicttype):
    test_basics(dicttype)
    test_persistence(dicttype)
def test():
    test_dicttype(logdict)
test()


Reply via email to