dabo Commit
Revision 7108
Date: 2012-03-10 12:52:43 -0800 (Sat, 10 Mar 2012)
Author: Paul
Trac: http://trac.dabodev.com/changeset/7108
Changed:
U trunk/dabo/dConstants.py
U trunk/dabo/db/dBackend.py
U trunk/dabo/db/dCursorMixin.py
U trunk/dabo/db/dbSQLite.py
Log:
Optimized the requery() cycle to not correct the field types immediately.
Instead, the field types are corrected as needed during the getFieldVal()
cycle. In my testing this yields a 7x speedup when requerying 1000 rows
in a table with 30 fields. We pay for it later a little bit when the
field type gets corrected and a flag set during getFieldVal(), but the
key is we don't pay for it all at once, and we don't pay for something
we may never use.
Optimized some use-cases of getDataSet(). Previously, we used a subtractive
approach by copying each record dict and deleting the fields we weren't
interested in. Now, we use an additive approach which results in a 3.25x
speedup when calling getDataSet() on 500 rows and only asking for 2 fields
out of 30. Other use cases are comparable to the old speed - a little
slower since we need to correct the field types if needed during the cycle
of desired rows.
Tested on MySQL and SQLite. Please let me know if you notice any trouble.
Diff:
Modified: trunk/dabo/dConstants.py
===================================================================
--- trunk/dabo/dConstants.py 2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/dConstants.py 2012-03-10 20:52:43 UTC (rev 7108)
@@ -25,6 +25,7 @@
CURSOR_MEMENTO = "dabo-memento"
CURSOR_NEWFLAG = "dabo-newrec"
CURSOR_TMPKEY_FIELD = "dabo-tmpKeyField"
+CURSOR_FIELD_TYPES_CORRECTED = "dabo-fieldTypesCorrected"
DLG_OK = 0
DLG_CANCEL = -1
Modified: trunk/dabo/db/dBackend.py
===================================================================
--- trunk/dabo/db/dBackend.py 2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dBackend.py 2012-03-10 20:52:43 UTC (rev 7108)
@@ -30,12 +30,6 @@
super(dBackend, self).__init__()
self.dbModuleName = None
self._connection = None
- # If the db module is set to hook into dCursor to correct the
field
- # types and convert the records to dict inline, then
dCursorMixin doesn't
- # have to reiterate the records to do those tasks. Set the
following to
- # True in the given db module to tell dCursorMixin not to
bother. As of this
- # writing, only dbSQLite is set up for this.
- self._alreadyCorrectedFieldTypes = False
# Reference to the cursor that is using this object
self._cursor = None
Modified: trunk/dabo/db/dCursorMixin.py
===================================================================
--- trunk/dabo/db/dCursorMixin.py 2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dCursorMixin.py 2012-03-10 20:52:43 UTC (rev 7108)
@@ -5,6 +5,7 @@
import time
import re
from decimal import Decimal
+import functools
import dabo
import dabo.dConstants as kons
from dabo.dLocalize import _
@@ -16,13 +17,15 @@
from dabo.lib.utils import noneSortKey, caseInsensitiveSortKey
from dabo.lib.utils import ustr
+cursor_flags = (kons.CURSOR_MEMENTO, kons.CURSOR_NEWFLAG,
+ kons.CURSOR_TMPKEY_FIELD, kons.CURSOR_FIELD_TYPES_CORRECTED)
+
class dCursorMixin(dObject):
"""Dabo's cursor class, representing the lowest tier."""
_call_initProperties = False
# Make these class attributes, so that they are shared among all
instances
_fieldStructure = {}
- _fieldsToAlwaysCorrectType = []
def __init__(self, sql="", *args, **kwargs):
self._convertStrToUnicode = True
@@ -185,6 +188,7 @@
rec = self._records[self.RowNumber]
except IndexError:
rec = {}
+ self._correctFieldTypesIfNeeded(rec)
if isinstance(self.KeyField, tuple):
if rec:
pk = tuple([rec[kk] for kk in self.KeyField])
@@ -207,7 +211,15 @@
return pkField
- def _correctFieldType(self, field_val, field_name, _newQuery=False):
+ def _correctFieldTypesIfNeeded(self, rec):
+ if not rec.get(kons.CURSOR_FIELD_TYPES_CORRECTED, False):
+ _correctFieldType = self._correctFieldType
+ for fld_name in (i for i in rec if i not in
cursor_flags):
+ rec[fld_name] =
_correctFieldType(rec[fld_name], fld_name)
+ rec[kons.CURSOR_FIELD_TYPES_CORRECTED] = True
+
+
+ def _correctFieldType(self, field_val, field_name):
"""
Correct the type of the passed field_val, based on
self.DataStructure.
@@ -218,65 +230,48 @@
"""
if field_val is None:
return field_val
- ret = field_val
- if _newQuery or (field_name in self._fieldsToAlwaysCorrectType):
- pythonType = self._types.get(field_name, None)
- if pythonType is None or pythonType == type(None):
- pythonType = self._types[field_name] =
dabo.db.getDataType(type(field_val))
+ pythonType = self._types.get(field_name, None)
+ if pythonType is None or pythonType == type(None):
+ pythonType = self._types[field_name] =
dabo.db.getDataType(type(field_val))
- if isinstance(field_val, str) and
self._convertStrToUnicode:
- # convert to unicode
- pass
- elif pythonType is None or isinstance(field_val,
pythonType):
- # No conversion needed.
- return ret
- else:
-
self._fieldsToAlwaysCorrectType.append(field_name)
+ if isinstance(field_val, pythonType):
+ # No conversion needed.
+ return field_val
- if pythonType in (unicode,):
- # Unicode conversion happens below.
- pass
- elif pythonType in (datetime.datetime,) and
isinstance(field_val, basestring):
- ret = dates.getDateTimeFromString(field_val)
- if ret is None:
- ret = field_val
- else:
- return ret
- elif pythonType in (datetime.date,) and
isinstance(field_val, basestring):
- ret = dates.getDateFromString(field_val)
- if ret is None:
- ret = field_val
- else:
- return ret
- elif pythonType in (Decimal,):
- ds = self.DataStructure
- ret = None
- _field_val = field_val
- if type(field_val) in (float,):
- # Can't convert to decimal directly
from float
- _field_val = ustr(_field_val)
- # Need to convert to the correct scale:
- scale = None
- for s in ds:
- if s[0] == field_name:
- if len(s) > 5:
- scale = s[5]
- if scale is None:
- scale = 2
- return
Decimal(_field_val).quantize(Decimal("0.%s" % (scale * "0",)))
- else:
- try:
- return pythonType(field_val)
- except Exception, e:
- tfv = type(field_val)
- dabo.log.info(_("_correctFieldType()
failed for field: '%(field_name)s'; value: '%(field_val)s'; type: '%(tfv)s'")
- % locals())
+ if pythonType in (unicode,):
+ # Unicode conversion happens below.
+ pass
+ elif pythonType in (datetime.datetime,) and
isinstance(field_val, basestring):
+ return dates.getDateTimeFromString(field_val)
+ elif pythonType in (datetime.date,) and isinstance(field_val,
basestring):
+ return dates.getDateFromString(field_val)
+ elif pythonType in (Decimal,):
+ ds = self.DataStructure
+ _field_val = field_val
+ if type(field_val) in (float,):
+ # Can't convert to decimal directly from float
+ _field_val = ustr(_field_val)
+ # Need to convert to the correct scale:
+ scale = None
+ for s in ds:
+ if s[0] == field_name:
+ if len(s) > 5:
+ scale = s[5]
+ if scale is None:
+ scale = 2
+ return Decimal(_field_val).quantize(Decimal("0.%s" %
(scale * "0",)))
+ else:
+ try:
+ return pythonType(field_val)
+ except Exception, e:
+ tfv = type(field_val)
+ dabo.log.info(_("_correctFieldType() failed for
field: '%(field_name)s'; value: '%(field_val)s'; type: '%(tfv)s'")
+ % locals())
# Do the unicode conversion last:
if isinstance(field_val, str) and self._convertStrToUnicode:
try:
- decoded = field_val.decode(self.Encoding)
- return decoded
+ return field_val.decode(self.Encoding)
except UnicodeDecodeError, e:
# Try some common encodings:
ok = False
@@ -301,15 +296,15 @@
%
{'fname':field_name, 'enc':enc})
return ret
else:
- raise e
+ raise
rfv = repr(field_val)
dabo.log.error(_("%(rfv)s couldn't be converted to
%(pythonType)s (field %(field_name)s)")
% locals())
- return ret
+ return field_val
- def execute(self, sql, params=None, _newQuery=False, errorClass=None,
convertQMarks=False):
+ def execute(self, sql, params=None, errorClass=None,
convertQMarks=False):
"""Execute the sql, and populate the DataSet if it is a select
statement."""
# The idea here is to let the super class do the actual work in
# retrieving the data. However, many cursor classes can only
return
@@ -404,24 +399,16 @@
errMsg = ustr(e)
dabo.log.error("Error fetching records: (%s, %s)" %
(type(e), errMsg))
- if _records and not
self.BackendObject._alreadyCorrectedFieldTypes:
- if isinstance(_records[0], (tuple, list)):
- # Need to convert each row to a Dict, since the
backend didn't do it.
- tmpRows = []
- fldNames = [f[0] for f in self.FieldDescription]
- for row in _records:
- dic = {}
- for idx, fldName in enumerate(fldNames):
- dic[fldName] =
self._correctFieldType(field_val=row[idx],
-
field_name=fldName, _newQuery=_newQuery)
- tmpRows.append(dic)
- _records = tmpRows
- else:
- # Already a DictCursor, but we still need to
correct the field types.
- for row in _records:
- for fld, val in row.items():
- row[fld] =
self._correctFieldType(field_val=val,
- field_name=fld,
_newQuery=_newQuery)
+ if _records and isinstance(_records[0], (tuple, list)):
+ # Need to convert each row to a Dict, since the backend
didn't do it.
+ tmpRows = []
+ fldNames = [f[0] for f in self.FieldDescription]
+ for row in _records:
+ dic = {}
+ for idx, fldName in enumerate(fldNames):
+ dic[fldName] = row[idx]
+ tmpRows.append(dic)
+ _records = tmpRows
self._records = dDataSet(_records)
# This will handle bounds issues
@@ -492,7 +479,7 @@
self.lastParams = params
self._savedStructureDescription = []
- self.execute(currSQL, params, _newQuery=newQuery)
+ self.execute(currSQL, params)
# clear mementos and new record flags:
self._mementos = {}
@@ -892,52 +879,49 @@
def getFieldVal(self, fld, row=None, _rowChangeCallback=None):
"""Return the value of the specified field in the current or
specified row."""
- if self.RowCount <= 0:
+ _records = self._records
+ if not _records:
raise dException.NoRecordsException(
_("No records in dataset '%s'.") %
self.Table)
if row is None:
- row = self.RowNumber
+ row = self._getRowNumber()
try:
- rec = self._records[row]
+ rec = _records[row]
except IndexError:
- cnt = len(self._records)
+ cnt = len(_records)
raise dException.RowNotFoundException(
_("Row #%(row)s requested, but the data
set has only %(cnt)s row(s),") % locals())
+ self._correctFieldTypesIfNeeded(rec)
if isinstance(fld, (tuple, list)):
- ret = []
- for xFld in fld:
- ret.append(self.getFieldVal(xFld, row=row))
- return tuple(ret)
- else:
- try:
- return rec[fld]
- except KeyError:
- try:
- vf = self.VirtualFields[fld]
- if not isinstance(vf, dict):
- vf = {"func": vf}
+ return map(functools.partial(self.getFieldVal,
row=row), fld)
+ if fld in rec:
+ return rec[fld]
+ elif fld in self.VirtualFields:
+ vf = self.VirtualFields[fld]
+ if not isinstance(vf, dict):
+ vf = {"func": vf}
- requery_children =
(vf.get("requery_children", False) and bool(_rowChangeCallback))
+ requery_children = (vf.get("requery_children", False)
and bool(_rowChangeCallback))
- # Move to specified row if necessary,
and then call the VirtualFields
- # function, which expects to be on the
correct row.
- if not requery_children:
- # The VirtualFields
'requery_children' key is False, or
- # we aren't being called by a
bizobj, so there aren't child bizobjs.
- _oldrow = self.RowNumber
- self.RowNumber = row
- ret = vf["func"]()
- self.RowNumber = _oldrow
- return ret
- else:
- # The VirtualFields
definition's 'requery_children' key is True, so
- # we need to request a row
change and requery of any child bizobjs
- # as necessary, before
executing the virtual field function.
- _rowChangeCallback(row)
- return vf["func"]()
- except KeyError:
- raise
dException.FieldNotFoundException("%s '%s' %s" % (
- _("Field"), fld,
_("does not exist in the data set")))
+ # Move to specified row if necessary, and then call the
VirtualFields
+ # function, which expects to be on the correct row.
+ if not requery_children:
+ # The VirtualFields 'requery_children' key is
False, or
+ # we aren't being called by a bizobj, so there
aren't child bizobjs.
+ _oldrow = self.RowNumber
+ self.RowNumber = row
+ ret = vf["func"]()
+ self.RowNumber = _oldrow
+ return ret
+ else:
+ # The VirtualFields definition's
'requery_children' key is True, so
+ # we need to request a row change and requery
of any child bizobjs
+ # as necessary, before executing the virtual
field function.
+ _rowChangeCallback(row)
+ return vf["func"]()
+ else:
+ raise dException.FieldNotFoundException("%s '%s' %s" % (
+ _("Field"), fld, _("does not exist in
the data set")))
def _fldTypeFromDB(self, fld):
@@ -1345,7 +1329,7 @@
pk = self.pkExpression(rec)
for k, v in rec.items():
- if k not in (kons.CURSOR_TMPKEY_FIELD,):
+ if k not in cursor_flags:
ret[k] = (None, v)
return ret
@@ -1382,31 +1366,35 @@
to only include the specified fields. rowStart specifies the
starting row
to include, and rows is the number of rows to return.
"""
- ds = []
- internals = (kons.CURSOR_TMPKEY_FIELD,)
rowCount = self.RowCount
if rows is None:
rows = rowCount
else:
rows = min(rowStart + rows, rowCount)
+ if rows < 1 or rowStart > self.RowCount:
+ return dDataSet()
+
+ getFieldVal = self.getFieldVal
+ _records = self._records
+ vFieldKeys = self.VirtualFields.keys()
+ _correctFieldTypesIfNeeded = self._correctFieldTypesIfNeeded
+
+ if not flds:
+ vflds = vFieldKeys
+ flds = [f for f in _records[rowStart] if
returnInternals or f not in cursor_flags]
+ else:
+ vflds = [f for f in _records if f in vFieldKeys]
+ flds = [f for f in flds if f not in vFieldKeys]
+
+ ds = []
for row in xrange(rowStart, rows):
- tmprec = self._records[row].copy()
- for k, v in self.VirtualFields.items():
- # only calc requested virtualFields
- if (flds and k in flds) or not flds:
- tmprec.update({k: self.getFieldVal(k,
row)})
- if flds:
- # user specified specific fields - get rid of
all others
- for k in tmprec.keys():
- if k not in flds:
- del tmprec[k]
- if not flds and not returnInternals:
- # user didn't specify explicit fields and
doesn't want internals
- for internal in internals:
- tmprec.pop(internal, None)
+ rec = _records[row]
+ _correctFieldTypesIfNeeded(rec)
+ tmprec = dict([(k, rec[k]) for k in flds])
+ for v in vflds:
+ tmprec.update({v: self.getFieldVal(v, row)})
ds.append(tmprec)
-
return dDataSet(ds)
Modified: trunk/dabo/db/dbSQLite.py
===================================================================
--- trunk/dabo/db/dbSQLite.py 2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dbSQLite.py 2012-03-10 20:52:43 UTC (rev 7108)
@@ -22,7 +22,6 @@
except ImportError:
import sqlite3 as dbapi
self.dbapi = dbapi
- self._alreadyCorrectedFieldTypes = True
def getConnection(self, connectInfo, forceCreate=False, **kwargs):
@@ -35,10 +34,7 @@
ret = {}
fieldNames = (fld[0] for fld in cursor.description)
for idx, field_name in enumerate(fieldNames):
- if _types:
- ret[field_name] =
cursor._correctFieldType(row[idx], field_name, _newQuery=True)
- else:
- ret[field_name] = row[idx]
+ ret[field_name] = row[idx]
return ret
class DictCursor(self.dbapi.Cursor):
@@ -180,10 +176,10 @@
def getFields(self, tableName, cursor):
cursor.execute("pragma table_info('%s')" % tableName)
- rs = cursor.getDataSet()
fields = []
- for rec in rs:
- typ = rec["type"].lower()
+ getFieldVal = cursor.getFieldVal
+ for rec_idx in range(cursor.RowCount):
+ typ = getFieldVal("type", rec_idx).lower()
if typ[:3] == "int":
fldType = "I"
elif typ[:3] == "dec" or typ[:4] == "real":
@@ -202,7 +198,7 @@
# Adi J. Sieker pointed out that the 'pk' column of the
pragma command
# returns a value indicating whether the field is the
PK or not. This simplifies
# the routine over having to parse the CREATE TABLE
code.
- fields.append((rec["name"], fldType, bool(rec['pk'])))
+ fields.append((getFieldVal("name", rec_idx), fldType,
bool(getFieldVal('pk', rec_idx))))
return tuple(fields)
_______________________________________________
Post Messages to: [email protected]
Subscription Maintenance: http://leafe.com/mailman/listinfo/dabo-dev
Searchable Archives: http://leafe.com/archives/search/dabo-dev
This message:
http://leafe.com/archives/byMID/[email protected]