So I went ahead and assumed the type may change in between rows. Attach and available here:
http://distfiles.scode.org/mlref/pygresql-3.8.2-typecastperf.patch This is against the 2.8.2 beta tarball. Am I being thick or is the CVS repository not usefully available for checkouts? I just find the web interface. First of all, in testing my changes I discovered another bug which is that for bool columns, we no longer receive a True/False from pgdb. This is because 'bool' was added as mapping to the STRING pgdbType(). Was this done intentionally to make sure that all pg types map to some form of standard DB API type? If so, the correct fix, as I see it, is to retain the 'bool' mapping for both STRING and BOOL, but to make sure type casting prefers treating it as a BOOL above treating it as a STRING (to test for BOOL first). Currently, the patch just removes the 'bool' mapping, pending discussion here. With respect to the actual performance fix, the type cache now maintains its own dict of type casters. No thread-safety is required since operation on cursors/connections is not declared thread-safe. I could have made the cache global, but that would require introducing dependencies on the threading module and start maintaining global state, which was a bigger divergence from the style/general tendency of the current code. I tested type casting of all types by the following test (I can produce an actual runnable test case without external dependencies if someone asks for it): txn.cursor.execute("CREATE TABLE tctest ( " " stringtest VARCHAR, " " binarytest BYTEA, " " booltest BOOL, " " integertest INT4, " " longtest INT8, " " floattest FLOAT8, " " numerictest NUMERIC, " " moneytest MONEY, " " datetimetest TIMESTAMP, " " rowidtest oid)") # fake oid, but what the heck vals = [ 'test', 'test', True, 5, 6L, 5.7, Decimal('234.234234'), Decimal('75.45'), '2008-10-21 15:00:00', 7897234L ] txn.cursor.execute("INSERT INTO tctest VALUES (%s, %s, %s, %s, %s, %s, %s, '%s'::MONEY, %s, %s)", vals) txn.cursor.execute("INSERT INTO tctest VALUES (%s, %s, %s, %s, %s, %s, %s, '%s'::MONEY, %s, %s)", vals) txn.cursor.execute("SELECT * FROM tctest") rows = txn.cursor.fetchall() print vals print rows[0] assert len(rows) == 2 assert rows[0] == vals -- / Peter Schuller PGP userID: 0xE9758B7D or 'Peter Schuller <[EMAIL PROTECTED]>' Key retrieval: Send an E-Mail to [EMAIL PROTECTED] E-Mail: [EMAIL PROTECTED] Web: http://www.scode.org
--- /tmp/PyGreSQL-3.8.2-pre080916/pgdb.py 2008-09-17 05:41:38.000000000 +0200
+++ pgdb.py 2008-10-21 11:01:45.000000000 +0200
@@ -97,34 +97,60 @@
def __init__(self, cnx):
self._src = cnx.source()
self._cache = {}
+ self._casters = {}
def typecast(self, typ, value):
- if value is None:
- # for NULL values, no typecast is necessary
+ return self._typecaster(typ)(value)
+
+ def _typecaster(self, typ):
+ # Type comparisons are very expensive because of the
+ # shear amount of string comparisons it will
+ # generate. Cache type casters for much better
+ # performance when fetching many values.
+ try:
+ return self._casters[typ]
+ except KeyError:
+ caster = self._create_typecaster(typ)
+ self._casters[typ] = caster
+
+ return caster
+
+ def _create_typecaster(self, typ):
+ def noop_caster(value):
return value
+
+ def no_none(fn):
+ def caster(value):
+ if value is None:
+ # NULL/None are equivalent regardless of type
+ return None
+ else:
+ return fn(value)
+ return caster
+
if typ == STRING:
- pass
+ return noop_caster
elif typ == BINARY:
- pass
+ return noop_caster
elif typ == BOOL:
- value = (value[:1] in ['t','T'])
+ return no_none(lambda value: (value[:1] in ['t','T']))
elif typ == INTEGER:
- value = int(value)
+ return no_none(lambda value: int(value))
elif typ == LONG:
- value = long(value)
+ return no_none(lambda value: long(value))
elif typ == FLOAT:
- value = float(value)
+ return no_none(lambda value: float(value))
elif typ == NUMERIC:
- value = Decimal(value)
+ return no_none(lambda value: Decimal(value))
elif typ == MONEY:
- value = ''.join(filter(lambda v: v in '0123456789.-', value))
- value = Decimal(value)
+ return no_none(lambda value: Decimal(''.join(filter(lambda v: v in '0123456789.-', value))))
elif typ == DATETIME:
# format may differ ... we'll give string
- pass
+ return noop_caster
elif typ == ROWID:
- value = long(value)
- return value
+ return no_none(lambda value: long(value))
+ else:
+ return noop_caster
def getdescr(self, oid):
try:
@@ -444,7 +470,7 @@
# Mandatory type objects defined by DB-API 2 specs:
-STRING = pgdbType('char', 'bpchar', 'name', 'text', 'varchar', 'bool')
+STRING = pgdbType('char', 'bpchar', 'name', 'text', 'varchar')
BINARY = pgdbType() # BLOB support is pg specific
NUMBER = pgdbType('int2', 'int4', 'serial', 'int8',
'float4', 'float8', 'numeric', 'money')
pgp0BBWtoqSnu.pgp
Description: PGP signature
_______________________________________________ PyGreSQL mailing list [email protected] http://mailman.vex.net/mailman/listinfo/pygresql
