[dabo-dev] dabo Commit 7108

Paul McNett Sat, 10 Mar 2012 12:52:54 -0800

dabo Commit
Revision 7108
Date: 2012-03-10 12:52:43 -0800 (Sat, 10 Mar 2012)
Author: Paul
Trac: http://trac.dabodev.com/changeset/7108


Changed:
U   trunk/dabo/dConstants.py
U   trunk/dabo/db/dBackend.py
U   trunk/dabo/db/dCursorMixin.py
U   trunk/dabo/db/dbSQLite.py

Log:
Optimized the requery() cycle to not correct the field types immediately. 
Instead, the field types are corrected as needed during the getFieldVal()
cycle. In my testing this yields a 7x speedup when requerying 1000 rows
in a table with 30 fields. We pay for it later a little bit when the
field type gets corrected and a flag set during getFieldVal(), but the
key is we don't pay for it all at once, and we don't pay for something 
we may never use.

Optimized some use-cases of getDataSet(). Previously, we used a subtractive 
approach by copying each record dict and deleting the fields we weren't
interested in. Now, we use an additive approach which results in a 3.25x
speedup when calling getDataSet() on 500 rows and only asking for 2 fields
out of 30. Other use cases are comparable to the old speed - a little 
slower since we need to correct the field types if needed during the cycle
of desired rows.

Tested on MySQL and SQLite. Please let me know if you notice any trouble.
 


Diff:
Modified: trunk/dabo/dConstants.py
===================================================================
--- trunk/dabo/dConstants.py    2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/dConstants.py    2012-03-10 20:52:43 UTC (rev 7108)
@@ -25,6 +25,7 @@
 CURSOR_MEMENTO = "dabo-memento"
 CURSOR_NEWFLAG = "dabo-newrec"
 CURSOR_TMPKEY_FIELD = "dabo-tmpKeyField"
+CURSOR_FIELD_TYPES_CORRECTED = "dabo-fieldTypesCorrected"
 
 DLG_OK = 0
 DLG_CANCEL = -1

Modified: trunk/dabo/db/dBackend.py
===================================================================
--- trunk/dabo/db/dBackend.py   2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dBackend.py   2012-03-10 20:52:43 UTC (rev 7108)
@@ -30,12 +30,6 @@
                super(dBackend, self).__init__()
                self.dbModuleName = None
                self._connection = None
-               # If the db module is set to hook into dCursor to correct the 
field
-               # types and convert the records to dict inline, then 
dCursorMixin doesn't
-               # have to reiterate the records to do those tasks. Set the 
following to
-               # True in the given db module to tell dCursorMixin not to 
bother. As of this
-               # writing, only dbSQLite is set up for this.
-               self._alreadyCorrectedFieldTypes = False
                # Reference to the cursor that is using this object
                self._cursor = None
 

Modified: trunk/dabo/db/dCursorMixin.py
===================================================================
--- trunk/dabo/db/dCursorMixin.py       2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dCursorMixin.py       2012-03-10 20:52:43 UTC (rev 7108)
@@ -5,6 +5,7 @@
 import time
 import re
 from decimal import Decimal
+import functools
 import dabo
 import dabo.dConstants as kons
 from dabo.dLocalize import _
@@ -16,13 +17,15 @@
 from dabo.lib.utils import noneSortKey, caseInsensitiveSortKey
 from dabo.lib.utils import ustr
 
+cursor_flags = (kons.CURSOR_MEMENTO, kons.CURSOR_NEWFLAG,
+               kons.CURSOR_TMPKEY_FIELD, kons.CURSOR_FIELD_TYPES_CORRECTED)
 
+
 class dCursorMixin(dObject):
        """Dabo's cursor class, representing the lowest tier."""
        _call_initProperties = False
        # Make these class attributes, so that they are shared among all 
instances
        _fieldStructure = {}
-       _fieldsToAlwaysCorrectType = []
 
        def __init__(self, sql="", *args, **kwargs):
                self._convertStrToUnicode = True
@@ -185,6 +188,7 @@
                                rec = self._records[self.RowNumber]
                        except IndexError:
                                rec = {}
+               self._correctFieldTypesIfNeeded(rec)
                if isinstance(self.KeyField, tuple):
                        if rec:
                                pk = tuple([rec[kk] for kk in self.KeyField])
@@ -207,7 +211,15 @@
                return pkField
 
 
-       def _correctFieldType(self, field_val, field_name, _newQuery=False):
+       def _correctFieldTypesIfNeeded(self, rec):
+               if not rec.get(kons.CURSOR_FIELD_TYPES_CORRECTED, False):
+                       _correctFieldType = self._correctFieldType
+                       for fld_name in (i for i in rec if i not in 
cursor_flags):
+                               rec[fld_name] = 
_correctFieldType(rec[fld_name], fld_name)
+                       rec[kons.CURSOR_FIELD_TYPES_CORRECTED] = True
+
+
+       def _correctFieldType(self, field_val, field_name):
                """
                Correct the type of the passed field_val, based on 
self.DataStructure.
 
@@ -218,65 +230,48 @@
                """
                if field_val is None:
                        return field_val
-               ret = field_val
-               if _newQuery or (field_name in self._fieldsToAlwaysCorrectType):
-                       pythonType = self._types.get(field_name, None)
-                       if pythonType is None or pythonType == type(None):
-                               pythonType = self._types[field_name] = 
dabo.db.getDataType(type(field_val))
+               pythonType = self._types.get(field_name, None)
+               if pythonType is None or pythonType == type(None):
+                       pythonType = self._types[field_name] = 
dabo.db.getDataType(type(field_val))
 
-                       if isinstance(field_val, str) and 
self._convertStrToUnicode:
-                               # convert to unicode
-                               pass
-                       elif pythonType is None or isinstance(field_val, 
pythonType):
-                               # No conversion needed.
-                               return ret
-                       else:
-                               
self._fieldsToAlwaysCorrectType.append(field_name)
+               if isinstance(field_val, pythonType):
+                       # No conversion needed.
+                       return field_val
 
-                       if pythonType in (unicode,):
-                               # Unicode conversion happens below.
-                               pass
-                       elif pythonType in (datetime.datetime,) and 
isinstance(field_val, basestring):
-                               ret = dates.getDateTimeFromString(field_val)
-                               if ret is None:
-                                       ret = field_val
-                               else:
-                                       return ret
-                       elif pythonType in (datetime.date,) and 
isinstance(field_val, basestring):
-                               ret = dates.getDateFromString(field_val)
-                               if ret is None:
-                                       ret = field_val
-                               else:
-                                       return ret
-                       elif pythonType in (Decimal,):
-                               ds = self.DataStructure
-                               ret = None
-                               _field_val = field_val
-                               if type(field_val) in (float,):
-                                       # Can't convert to decimal directly 
from float
-                                       _field_val = ustr(_field_val)
-                               # Need to convert to the correct scale:
-                               scale = None
-                               for s in ds:
-                                       if s[0] == field_name:
-                                               if len(s) > 5:
-                                                       scale = s[5]
-                               if scale is None:
-                                       scale = 2
-                               return 
Decimal(_field_val).quantize(Decimal("0.%s" % (scale * "0",)))
-                       else:
-                               try:
-                                       return pythonType(field_val)
-                               except Exception, e:
-                                       tfv = type(field_val)
-                                       dabo.log.info(_("_correctFieldType() 
failed for field: '%(field_name)s'; value: '%(field_val)s'; type: '%(tfv)s'")
-                                                       % locals())
+               if pythonType in (unicode,):
+                       # Unicode conversion happens below.
+                       pass
+               elif pythonType in (datetime.datetime,) and 
isinstance(field_val, basestring):
+                       return dates.getDateTimeFromString(field_val)
+               elif pythonType in (datetime.date,) and isinstance(field_val, 
basestring):
+                       return dates.getDateFromString(field_val)
+               elif pythonType in (Decimal,):
+                       ds = self.DataStructure
+                       _field_val = field_val
+                       if type(field_val) in (float,):
+                               # Can't convert to decimal directly from float
+                               _field_val = ustr(_field_val)
+                       # Need to convert to the correct scale:
+                       scale = None
+                       for s in ds:
+                               if s[0] == field_name:
+                                       if len(s) > 5:
+                                               scale = s[5]
+                       if scale is None:
+                               scale = 2
+                       return Decimal(_field_val).quantize(Decimal("0.%s" % 
(scale * "0",)))
+               else:
+                       try:
+                               return pythonType(field_val)
+                       except Exception, e:
+                               tfv = type(field_val)
+                               dabo.log.info(_("_correctFieldType() failed for 
field: '%(field_name)s'; value: '%(field_val)s'; type: '%(tfv)s'")
+                                               % locals())
 
                # Do the unicode conversion last:
                if isinstance(field_val, str) and self._convertStrToUnicode:
                        try:
-                               decoded = field_val.decode(self.Encoding)
-                               return decoded
+                               return field_val.decode(self.Encoding)
                        except UnicodeDecodeError, e:
                                # Try some common encodings:
                                ok = False
@@ -301,15 +296,15 @@
                                                                % 
{'fname':field_name, 'enc':enc})
                                                        return ret
                                else:
-                                       raise e
+                                       raise 
 
                        rfv = repr(field_val)
                        dabo.log.error(_("%(rfv)s couldn't be converted to 
%(pythonType)s (field %(field_name)s)")
                                        % locals())
-               return ret
+               return field_val
 
 
-       def execute(self, sql, params=None, _newQuery=False, errorClass=None, 
convertQMarks=False):
+       def execute(self, sql, params=None, errorClass=None, 
convertQMarks=False):
                """Execute the sql, and populate the DataSet if it is a select 
statement."""
                # The idea here is to let the super class do the actual work in
                # retrieving the data. However, many cursor classes can only 
return
@@ -404,24 +399,16 @@
                                errMsg = ustr(e)
                        dabo.log.error("Error fetching records: (%s, %s)" % 
(type(e), errMsg))
 
-               if _records and not 
self.BackendObject._alreadyCorrectedFieldTypes:
-                       if isinstance(_records[0], (tuple, list)):
-                               # Need to convert each row to a Dict, since the 
backend didn't do it.
-                               tmpRows = []
-                               fldNames = [f[0] for f in self.FieldDescription]
-                               for row in _records:
-                                       dic = {}
-                                       for idx, fldName in enumerate(fldNames):
-                                               dic[fldName] = 
self._correctFieldType(field_val=row[idx],
-                                                               
field_name=fldName, _newQuery=_newQuery)
-                                       tmpRows.append(dic)
-                               _records = tmpRows
-                       else:
-                               # Already a DictCursor, but we still need to 
correct the field types.
-                               for row in _records:
-                                       for fld, val in row.items():
-                                               row[fld] = 
self._correctFieldType(field_val=val,
-                                                               field_name=fld, 
_newQuery=_newQuery)
+               if _records and isinstance(_records[0], (tuple, list)):
+                       # Need to convert each row to a Dict, since the backend 
didn't do it.
+                       tmpRows = []
+                       fldNames = [f[0] for f in self.FieldDescription]
+                       for row in _records:
+                               dic = {}
+                               for idx, fldName in enumerate(fldNames):
+                                       dic[fldName] = row[idx]
+                               tmpRows.append(dic)
+                       _records = tmpRows
 
                self._records = dDataSet(_records)
                # This will handle bounds issues
@@ -492,7 +479,7 @@
                self.lastParams = params
                self._savedStructureDescription = []
 
-               self.execute(currSQL, params, _newQuery=newQuery)
+               self.execute(currSQL, params)
 
                # clear mementos and new record flags:
                self._mementos = {}
@@ -892,52 +879,49 @@
 
        def getFieldVal(self, fld, row=None, _rowChangeCallback=None):
                """Return the value of the specified field in the current or 
specified row."""
-               if self.RowCount <= 0:
+               _records = self._records
+               if not _records:
                        raise dException.NoRecordsException(
                                        _("No records in dataset '%s'.") % 
self.Table)
                if row is None:
-                       row = self.RowNumber
+                       row = self._getRowNumber()
                try:
-                       rec = self._records[row]
+                       rec = _records[row]
                except IndexError:
-                       cnt = len(self._records)
+                       cnt = len(_records)
                        raise dException.RowNotFoundException(
                                        _("Row #%(row)s requested, but the data 
set has only %(cnt)s row(s),") % locals())
+               self._correctFieldTypesIfNeeded(rec)
                if isinstance(fld, (tuple, list)):
-                       ret = []
-                       for xFld in fld:
-                               ret.append(self.getFieldVal(xFld, row=row))
-                       return tuple(ret)
-               else:
-                       try:
-                               return rec[fld]
-                       except KeyError:
-                               try:
-                                       vf = self.VirtualFields[fld]
-                                       if not isinstance(vf, dict):
-                                               vf = {"func": vf}
+                       return map(functools.partial(self.getFieldVal, 
row=row), fld)
+               if fld in rec:
+                       return rec[fld]
+               elif fld in self.VirtualFields:
+                       vf = self.VirtualFields[fld]
+                       if not isinstance(vf, dict):
+                               vf = {"func": vf}
 
-                                       requery_children = 
(vf.get("requery_children", False) and bool(_rowChangeCallback))
+                       requery_children = (vf.get("requery_children", False) 
and bool(_rowChangeCallback))
 
-                                       # Move to specified row if necessary, 
and then call the VirtualFields
-                                       # function, which expects to be on the 
correct row.
-                                       if not requery_children:
-                                               # The VirtualFields 
'requery_children' key is False, or
-                                               # we aren't being called by a 
bizobj, so there aren't child bizobjs.
-                                               _oldrow = self.RowNumber
-                                               self.RowNumber = row
-                                               ret = vf["func"]()
-                                               self.RowNumber = _oldrow
-                                               return ret
-                                       else:
-                                               # The VirtualFields 
definition's 'requery_children' key is True, so
-                                               # we need to request a row 
change and requery of any child bizobjs
-                                               # as necessary, before 
executing the virtual field function.
-                                               _rowChangeCallback(row)
-                                               return vf["func"]()
-                               except KeyError:
-                                       raise 
dException.FieldNotFoundException("%s '%s' %s" % (
-                                                       _("Field"), fld, 
_("does not exist in the data set")))
+                       # Move to specified row if necessary, and then call the 
VirtualFields
+                       # function, which expects to be on the correct row.
+                       if not requery_children:
+                               # The VirtualFields 'requery_children' key is 
False, or
+                               # we aren't being called by a bizobj, so there 
aren't child bizobjs.
+                               _oldrow = self.RowNumber
+                               self.RowNumber = row
+                               ret = vf["func"]()
+                               self.RowNumber = _oldrow
+                               return ret
+                       else:
+                               # The VirtualFields definition's 
'requery_children' key is True, so
+                               # we need to request a row change and requery 
of any child bizobjs
+                               # as necessary, before executing the virtual 
field function.
+                               _rowChangeCallback(row)
+                               return vf["func"]()
+               else:
+                       raise dException.FieldNotFoundException("%s '%s' %s" % (
+                                       _("Field"), fld, _("does not exist in 
the data set")))
 
 
        def _fldTypeFromDB(self, fld):
@@ -1345,7 +1329,7 @@
                        pk = self.pkExpression(rec)
 
                for k, v in rec.items():
-                       if k not in (kons.CURSOR_TMPKEY_FIELD,):
+                       if k not in cursor_flags:
                                ret[k] = (None, v)
                return ret
 
@@ -1382,31 +1366,35 @@
                to only include the specified fields. rowStart specifies the 
starting row
                to include, and rows is the number of rows to return.
                """
-               ds = []
-               internals = (kons.CURSOR_TMPKEY_FIELD,)
                rowCount = self.RowCount
 
                if rows is None:
                        rows = rowCount
                else:
                        rows = min(rowStart + rows, rowCount)
+               if rows < 1 or rowStart > self.RowCount:
+                       return dDataSet()
+
+               getFieldVal = self.getFieldVal
+               _records = self._records
+               vFieldKeys = self.VirtualFields.keys()
+               _correctFieldTypesIfNeeded = self._correctFieldTypesIfNeeded
+
+               if not flds:
+                       vflds = vFieldKeys
+                       flds = [f for f in _records[rowStart] if 
returnInternals or f not in cursor_flags]
+               else:
+                       vflds = [f for f in _records if f in vFieldKeys]
+                       flds = [f for f in flds if f not in vFieldKeys]
+
+               ds = []
                for row in xrange(rowStart, rows):
-                       tmprec = self._records[row].copy()
-                       for k, v in self.VirtualFields.items():
-                               # only calc requested virtualFields
-                               if (flds and k in flds) or not flds:
-                                       tmprec.update({k: self.getFieldVal(k, 
row)})
-                       if flds:
-                               # user specified specific fields - get rid of 
all others
-                               for k in tmprec.keys():
-                                       if k not in flds:
-                                               del tmprec[k]
-                       if not flds and not returnInternals:
-                               # user didn't specify explicit fields and 
doesn't want internals
-                               for internal in internals:
-                                       tmprec.pop(internal, None)
+                       rec = _records[row]
+                       _correctFieldTypesIfNeeded(rec)
+                       tmprec = dict([(k, rec[k]) for k in flds])
+                       for v in vflds:
+                               tmprec.update({v: self.getFieldVal(v, row)})
                        ds.append(tmprec)
-
                return dDataSet(ds)
 
 

Modified: trunk/dabo/db/dbSQLite.py
===================================================================
--- trunk/dabo/db/dbSQLite.py   2012-03-10 20:50:15 UTC (rev 7107)
+++ trunk/dabo/db/dbSQLite.py   2012-03-10 20:52:43 UTC (rev 7108)
@@ -22,7 +22,6 @@
                except ImportError:
                        import sqlite3 as dbapi
                self.dbapi = dbapi
-               self._alreadyCorrectedFieldTypes = True
 
 
        def getConnection(self, connectInfo, forceCreate=False, **kwargs):
@@ -35,10 +34,7 @@
                        ret = {}
                        fieldNames = (fld[0] for fld in cursor.description)
                        for idx, field_name in enumerate(fieldNames):
-                               if _types:
-                                       ret[field_name] = 
cursor._correctFieldType(row[idx], field_name, _newQuery=True)
-                               else:
-                                       ret[field_name] = row[idx]
+                               ret[field_name] = row[idx]
                        return ret
 
                class DictCursor(self.dbapi.Cursor):
@@ -180,10 +176,10 @@
 
        def getFields(self, tableName, cursor):
                cursor.execute("pragma table_info('%s')" % tableName)
-               rs = cursor.getDataSet()
                fields = []
-               for rec in rs:
-                       typ = rec["type"].lower()
+               getFieldVal = cursor.getFieldVal
+               for rec_idx in range(cursor.RowCount):
+                       typ = getFieldVal("type", rec_idx).lower()
                        if typ[:3] == "int":
                                fldType = "I"
                        elif typ[:3] == "dec" or typ[:4] == "real":
@@ -202,7 +198,7 @@
                        # Adi J. Sieker pointed out that the 'pk' column of the 
pragma command
                        # returns a value indicating whether the field is the 
PK or not. This simplifies
                        # the routine over having to parse the CREATE TABLE 
code.
-                       fields.append((rec["name"], fldType, bool(rec['pk'])))
+                       fields.append((getFieldVal("name", rec_idx), fldType, 
bool(getFieldVal('pk', rec_idx))))
                return tuple(fields)
 
 



_______________________________________________
Post Messages to: [email protected]
Subscription Maintenance: http://leafe.com/mailman/listinfo/dabo-dev
Searchable Archives: http://leafe.com/archives/search/dabo-dev
This message: 
http://leafe.com/archives/byMID/[email protected]

[dabo-dev] dabo Commit 7108

Reply via email to