Hi,

I'm looking at various options for implementing a high throughput database logger that will work with Twisted.

My requirements, listed by importance:

1) small memory footprint
2) high speed
3) low garbage generation

The application I'm working on runs continuously (24/7). I've experimented a bit with pysqlite and Twisted to see which approach is better suited (see attached example).

----

Question 1: I noticed that all of the Twisted based versions are very slow compared to the plain sqlite3 test. This seems to be caused by atomic transaction management, namely a commit after each insert.

Would be interested to know if there is a simple way to avoid this and do my own transaction management (aka batch commit).

One other thing is the greatly varying amounts of garbage generated (peak memory) and memory usage between the Twisted variants.

----

Question 2: I would have expected B (Twisted ADBAPI) to behave very similar to C/E since I'm using a connection pool of size 1 and all requests are queued and handled sequentially.

Could any of you please give me some pointers as to why this is happening?

----

Question 3: Even though objgraph lists the exact same reference count once the code is ran, the amount of used memory greatly differs. Any ideas what might be causing this?

Any suggestions and/or pointers on how to improve/do this are more than welcome.

Thank you for your time,
Adrian
import gc
import objgraph
import os
import sqlite3
import sys

from time import sleep
from twisted.enterprise.adbapi import ConnectionPool
from twisted.internet import defer, task, reactor


def _removeFile(path):
    try:
        os.unlink(path)
    except OSError:
        pass


def plain_sqlite3(conn, rows):
    query = 'INSERT INTO t (value) VALUES (1)'

    cursor = conn.cursor()
    for row in range(rows):
        cursor.execute(query)

    cursor.close()
    conn.commit()
    


def adbapi(pool, rows):
    query = 'INSERT INTO tw (value) VALUES (2)'

    last = None
    for row in range(rows):
        last = pool.runOperation(query)
        last.addCallback(lambda _: None)

    return last


def inline_callbacks(pool, rows):
    query = 'INSERT INTO tw (value) VALUES (3)'

    @defer.inlineCallbacks
    def do_insert():
        for row in range(rows):
            deferred = pool.runOperation(query)
            deferred.addCallback(lambda _: None)
            yield deferred

    return do_insert()


def semaphore(pool, rows):
    query = 'INSERT INTO tw (value) VALUES (4)'

    semaphore = defer.DeferredSemaphore(1)
    last = None
    for row in range(rows):
        last = semaphore.run(pool.runOperation, query)
        last.addCallback(lambda _: None)

    return last


def cooperator(pool, rows):
    query = 'INSERT INTO tw (value) VALUES (5)'
    
    def generator():
        for row in range(rows):
            deferred = pool.runOperation(query)
            deferred.addCallback(lambda _: None)
            yield deferred

    cooperator = task.Cooperator()
    return cooperator.coiterate(generator())



def run(callable, repeats):
    _removeFile('test-sq3.db3')
    conn = sqlite3.connect('./test-sq3.db3')
    cursor = conn.cursor()
    cursor.execute('CREATE TABLE t (id ROWID, value INTEGER)')

    for step in range(repeats):
        print "Run #%d %s..." % (step, inserter)
        callable(conn, 2000)

    conn.close()    

    cursor = None
    conn = None


def run_twisted(callable, repeats):
    _removeFile('test-twisted.db3')
    pool = ConnectionPool('sqlite3', cp_min=1, cp_max=1, 
database='test-twisted.db3', check_same_thread=False)
    pool.runOperation('CREATE TABLE tw (id ROWID, value INTEGER)')

    last = None

    @defer.inlineCallbacks
    def execute():
        for step in range(repeats):
            print "Run #%d %s..." % (step, callable)
            last = callable(pool, 2000)
            yield last

        last.addCallback(lambda _: pool.close())
        last.addCallback(lambda _: reactor.stop())

    reactor.callWhenRunning(execute)
    reactor.run()
    
    last = None
    pool = None


gc.collect()
objgraph.show_growth()
    
#run(plain_sqlite3, 100)
#run_twisted(adbapi, 100)
#run_twisted(inline_callbacks, 100)
#run_twisted(semaphore, 100)
run_twisted(cooperator, 100)

print "Press ENTER to exit..."
sys.stdin.read(1)

gc.collect()
objgraph.show_growth()
A. Plain SQLite3
----------------

Memory: 17 Mb
Peak memory: 19 Mb


B. Twisted ADBAPI
-----------------

Memory: 36 Mb
Peak memory: 240 Mb

wrapper_descriptor     1326       +15
function               2716       +13
dict                   1895        +8
getset_descriptor       444        +5
weakref                1067        +4
member_descriptor       374        +3
list                    331        +3
method_descriptor       700        +1
classobj                108        +1
module                  165        +1

C. Twisted Inline Callbacks
---------------------------

Memory: 21 Mb
Peak memory: 23 Mb

wrapper_descriptor     1326       +15
function               2716       +13
dict                   1895        +8
getset_descriptor       444        +5
weakref                1067        +4
member_descriptor       374        +3
list                    331        +3
method_descriptor       700        +1
classobj                108        +1
module                  165        +1


D. Twisted Deferred Semaphore
-----------------------------

Memory: 67 Mb
Peak memory: 288 Mb

wrapper_descriptor     1326       +15
function               2716       +13
dict                   1895        +8
getset_descriptor       444        +5
weakref                1067        +4
member_descriptor       374        +3
list                    331        +3
method_descriptor       700        +1
classobj                108        +1
module                  165        +1

E. Twisted Cooperator
---------------------

Memory: 18 Mb
Peak memory: 20 Mb

wrapper_descriptor     1326       +15
function               2706       +13
dict                   1890        +8
getset_descriptor       434        +5
weakref                1062        +4
member_descriptor       374        +3
list                    331        +3
method_descriptor       700        +1
classobj                108        +1
module                  165        +1
_______________________________________________
Twisted-Python mailing list
Twisted-Python@twistedmatrix.com
http://twistedmatrix.com/cgi-bin/mailman/listinfo/twisted-python

Reply via email to