Sigh. I initially sent this Jan 2 to "pyg...@telsasoft.com" ..and didn't notice the bounce back.
I was going to do a performance test for prepared queries, specifically for INSERTs: simple queries , query_formatted , inline=True/False , SQL EXECUTE , pgdb , dicts ,and PQexecPrepared. I didn't get far before needing to step back, as I'm shocked by how slow parameter formatting is, just to turn %s into $1. It's nearly 10x slower than query() with $1 params. That's true for pg (which can send params out of line) and pgdb (which always inlines params). If I'd noticed 2 years ago, I might have avoided using DB.query_formatted() just to let developers use %s formats. Perhaps the only reason why this isn't causing issues for our data loaders is that each CPU heavy formatted INSERT is partially balanced with I/O on the DB server. But I can easily imagine that being a deal-breaker. Maybe I'm wrong, but I'd have expected the additional processing cost to be within 10% or 20% of query, and certainly within a factor of two. BTW, prepared query completely avoids this by avoiding all the isinstance which appears to be most of the expense. The high performance cost and mitigation (prepares) should be documented. Also, I realized that SQL EXECUTE is a bad way to do it, and maybe shouldn't be documented, since it requires quoting each parameter (which is slow and thereby defeats itself). I tried to use cProfile to find hotspots and guessed at the rest until it worked. My tweaks make it 3x faster - however, it's still 3x slower than query() ! My changes are not very "disciplined" so please consider this a proof of concept written by a non pytho-phone. I didn't touch pgdb and I haven't tested with dict formats. I imagine you may have a better idea how/what to cache. 3 runs on a semi-idle server with checkpoint_interval=60sec. 100000rows and 100cols (I believe this scales roughly as rows*cols) method unpatched | patched time (sec) ------------------------------------------+------------------- pg simple insert with txn: (105, 98, 91) | pg query() $1 params: (169 172 178) | 174 pg query_formatted %s params: (745) | 314 pg insert using inline=True: (386) | 187 pg prepare_query + execute_prepared (n/a) / (90, 88, 94) - note, that's not included in this patch pgdb insert with/out params: TODO SQL EXECUTE + $1 + inline=True: TODO dicts TODO psycopg TODO Find attached my patch and test, which was meant to approximate our loader processes, which I'd like to convert to use prepared query. Justin
diff --git a/pg.py b/pg.py index e830a66..a7758a7 100644 --- a/pg.py +++ b/pg.py @@ -351,7 +351,9 @@ class _ParameterList(list): placeholder will be returned and the parameter list will be augmented. """ value = self.adapt(value, typ) - if isinstance(value, Literal): + #if value.__class__.__name__==Literal or isinstance(value, Literal): + if type(value).__name__==Literal or isinstance(value, Literal): + #XXX if _typecache.get(type(value).__name__)=='Literal' or isinstance(value, Literal): return value self.append(value) return '$%d' % len(self) @@ -429,9 +431,7 @@ class Adapter: @staticmethod def _adapt_num(v): """Adapt a numeric parameter.""" - if not v and v != 0: - return None - return v + return v if v or v==0 else None _adapt_int = _adapt_float = _adapt_money = _adapt_num @@ -537,29 +537,58 @@ class Adapter: value.append(v) return '(%s)' % ','.join(value) + # This is to avoid calling isinstance() numerous times for each row*col + # Should just cache the result of adapt() ? + _typdict=dict(Bytea='bytea', basestring='text', bool='bool', int='int', long='int', float='float', Decimal='int', # Decimal='num', + date='date', time='date', datetime='date', timedelta='date') + # Literal='Literal' ? + + # Map from type(value) to class of which it's an instance + # to avoid repeated, slow lookups by dynamic attribute + # Could be static, like in pgdb. + #self.adapt_dict = dict([(a[7:],Adapter.__dict__[a]) for a in Adapter.__dict__ if a.startswith('_adapt_')]) + _typecache={} def adapt(self, value, typ=None): """Adapt a value with known database type.""" - if value is not None and not isinstance(value, Literal): + + typval0 = typval = type(value).__name__ #.__class__.__name__ + try: # XXX: should index on "simple" ? + if not typ: + #print ('hit cache') + return self._typecache[typval](value) + except KeyError: + pass + + if value is None: + pass + elif self._typdict.get(typval)=='Literal': + pass + else: if typ: simple = self.get_simple_name(typ) else: - typ = simple = self.guess_simple_type(value) or 'text' + typ = simple = self._typdict.get(typval) or self.guess_simple_type(value) or 'text' try: value = value.__pg_str__(typ) + typval = type(value).__name__ except AttributeError: pass + if simple == 'text': pass elif simple == 'record': if isinstance(value, tuple): value = self._adapt_record(value, typ) - elif simple.endswith('[]'): + elif simple[-2:]=='[]': if isinstance(value, list): adapt = getattr(self, '_adapt_%s_array' % simple[:-2]) value = adapt(value) else: - adapt = getattr(self, '_adapt_%s' % simple) - value = adapt(value) + _adapt = getattr(self, '_adapt_%s' % simple) + value = _adapt(value) + if typval0==typval and typ==simple: + #print ('caching val:%s simp:%s typ:%s cache:%s'%(value,simple,typval,self._typecache)) + self._typecache[simple] = _adapt return value @staticmethod @@ -625,33 +654,63 @@ class Adapter: def adapt_inline(self, value, nested=False): """Adapt a value that is put into the SQL and needs to be quoted.""" + if value is None: return 'NULL' - if isinstance(value, Literal): - return value - if isinstance(value, Bytea): + + typval=type(value).__name__ + + if typval not in self._typdict: + #sys.stderr.write ('failed lookup4 for %s %s\n'%(typval,value)) + if isinstance(value, Literal): + self._typdict[typval]='Literal' + if isinstance(value, Bytea): + self._typdict[typval]='Bytea' + elif isinstance(value, Json): + self._typdict[typval]='Json' + elif isinstance(value, (datetime, date, time, timedelta)): # datetime.date??? + self._typdict[typval]='date' + #elif isinstance(value, basestring): + #self._typdict[typval]='basestring' + elif isinstance(value, bool): + self._typdict[typval]='bool' + elif isinstance(value, float): + self._typdict[typval]='float' + elif isinstance(value, (int, long, Decimal)): + self._typdict[typval]='int' + + typ=self._typdict.get(typval) + if typ== 'Bytea': value = self.db.escape_bytea(value) if bytes is not str: # Python >= 3.0 value = value.decode('ascii') - elif isinstance(value, Json): + elif typ== 'Json': if value.encode: return value.encode() value = self.db.encode_json(value) - elif isinstance(value, (datetime, date, time, timedelta)): + elif typ== 'date': #, date, time, timedelta)): value = str(value) - if isinstance(value, basestring): - value = self.db.escape_string(value) - return "'%s'" % value - if isinstance(value, bool): + + typval=type(value).__name__ + + if typ=='Literal': + return value + if typ== 'bool': return 'true' if value else 'false' - if isinstance(value, float): + if typ== 'float': if isinf(value): return "'-Infinity'" if value < 0 else "'Infinity'" if isnan(value): return "'NaN'" return value - if isinstance(value, (int, long, Decimal)): + if typ== 'int': #, long, Decimal): return value + +#... + + if isinstance(value, basestring): + value = self.db.escape_string(value) + return "'%s'" % value if isinstance(value, list): q = self.adapt_inline s = '[%s]' if nested else 'ARRAY[%s]' @@ -663,7 +722,7 @@ class Adapter: value = value.__pg_repr__() except AttributeError: raise InterfaceError( - 'Do not know how to adapt type %s' % type(value)) + 'Do not know how to adapt type %s %s %s %s %s' % (type(value), value, typval, repr(self._typdict.get(typval)), self._typdict.get(typval)=='basestring')) if isinstance(value, (tuple, list)): value = self.adapt_inline(value) return value @@ -685,7 +744,8 @@ class Adapter: if inline and types: raise ValueError('Typed parameters must be sent separately') params = self.parameter_list() - if isinstance(values, (list, tuple)): + valtyp = type(values).__name__ #values.__class__.__name__ + if valtyp in (list,tuple) or isinstance(values, (list, tuple)): if inline: adapt = self.adapt_inline literals = [adapt(value) for value in values] @@ -700,8 +760,13 @@ class Adapter: for value, typ in zip(values, types): append(add(value, typ)) else: - for value in values: - append(add(value)) + _adapt = self.adapt + # Optimizations: + # - Use 2nd list comprehension hack to assign to w to avoid calling adapt twice + # - Short circuit isinstance + params = [w for v in values for w in (_adapt(v),) if self._typdict.get(type(w).__name__)!='Literal'] + literals = ['$%d' % i for i in range(1,1+len(values))] + command %= tuple(literals) elif isinstance(values, dict): # we want to allow extra keys in the dictionary, diff --git a/pgdb.py b/pgdb.py index fe52df4..88a314d 100644 --- a/pgdb.py +++ b/pgdb.py @@ -883,6 +883,8 @@ class Cursor(object): """Exit the runtime context for the cursor object.""" self.close() + # TODO: avoid dozens of isinstance calls same as pgdb quote_inline + def _quote(self, value): """Quote value depending on its type.""" if value is None: -- 2.12.2
_______________________________________________ PyGreSQL mailing list PyGreSQL@Vex.Net https://mail.vex.net/mailman/listinfo/pygresql