Log message for revision 115349: Merge in the essential parts of unimr's queryplan
Changed: U Zope/trunk/src/Products/ZCatalog/Catalog.py U Zope/trunk/src/Products/ZCatalog/report.py U Zope/trunk/src/Products/ZCatalog/tests/test_catalog.py -=- Modified: Zope/trunk/src/Products/ZCatalog/Catalog.py =================================================================== --- Zope/trunk/src/Products/ZCatalog/Catalog.py 2010-08-01 19:17:49 UTC (rev 115348) +++ Zope/trunk/src/Products/ZCatalog/Catalog.py 2010-08-01 20:26:37 UTC (rev 115349) @@ -482,7 +482,7 @@ continue order.append((ILimitedResultIndex.providedBy(index), name)) order.sort() - return order + return [i[1] for i in order] def search(self, query, sort_index=None, reverse=0, limit=None, merge=1): """Iterate through the indexes, applying the query to each one. If @@ -506,17 +506,22 @@ # Canonicalize the request into a sensible query before passing it on query = self.make_query(query) + cr = self.getCatalogReport(query) cr.start() - for limit_result, i in self._sorted_search_indexes(query): + plan = cr.plan() + if not plan: + plan = self._sorted_search_indexes(query) + + for i in plan: index = self.getIndex(i) _apply_index = getattr(index, "_apply_index", None) if _apply_index is None: continue - cr.split(i) - if limit_result: + cr.start_split(i) + if ILimitedResultIndex.providedBy(index): r = _apply_index(query, rs) else: r = _apply_index(query) @@ -528,14 +533,15 @@ # once we don't need to support the "return everything" case # anymore if r is not None and not r: - cr.split(i, None) + cr.stop_split(i, None) return LazyCat([]) - cr.split(i, r) + + cr.stop_split(i, r) w, rs = weightedIntersection(rs, r) if not rs: break else: - cr.split(i, None) + cr.stop_split(i, None) cr.stop() Modified: Zope/trunk/src/Products/ZCatalog/report.py =================================================================== --- Zope/trunk/src/Products/ZCatalog/report.py 2010-08-01 19:17:49 UTC (rev 115348) +++ Zope/trunk/src/Products/ZCatalog/report.py 2010-08-01 20:26:37 UTC (rev 115349) @@ -25,6 +25,7 @@ value_indexes = frozenset() MAX_DISTINCT_VALUES = 10 +REFRESH_RATE = 100 def determine_value_indexes(indexes): @@ -101,67 +102,121 @@ return key -class StopWatch(object): - """ Simple stopwatch class """ +class CatalogReport(object): + """Catalog report class to meassure and identify catalog queries. + """ - def __init__(self): + def __init__(self, catalog, query=None, threshold=0.1): self.init() + self.catalog = catalog + self.query = query + self._key = None + self.threshold = threshold + parent = aq_parent(catalog) + path = getattr(aq_base(parent), 'getPhysicalPath', None) + if path is None: + path = ('', 'NonPersistentCatalog') + else: + path = tuple(parent.getPhysicalPath()) + self.cid = path + def init(self): self.res = [] self.start_time = None self.interim = {} self.stop_time = None + self.duration = None + def prioritymap(self): + # holds the benchmark of each index + prioritymap = getattr(self.catalog, '_v_prioritymap', None) + if prioritymap is None: + prioritymap = self.catalog._v_prioritymap = {} + return prioritymap + + def benchmark(self): + # holds the benchmark of each index + return self.prioritymap().get(self.key(), None) + + def plan(self): + benchmark = self.benchmark() + if not benchmark: + return None + + # sort indexes on (mean hits, mean search time) + ranking = [((v[0], v[1]), k) for k, v in benchmark.items()] + ranking.sort() + return [i[1] for i in ranking] + def start(self): self.init() self.start_time = time.time() + benchmark = self.benchmark() + if benchmark is None: + self.prioritymap()[self.key()] = {} - def split(self, label, result=None): + def start_split(self, label, result=None): + self.interim[label] = (time.time(), None) + + def stop_split(self, name, result=None): current = time.time() - start_time, stop_time = self.interim.get(label, (None, None)) - - if start_time is None: - self.interim[label] = (current, None) - return - + start_time, stop_time = self.interim.get(name, (None, None)) length = 0 if result is not None: # TODO: calculating the length can be expensive length = len(result) - self.interim[label] = (start_time, current) - self.res.append((label, current - start_time, length)) + self.interim[name] = (start_time, current) + dt = current - start_time + self.res.append((name, current - start_time, length)) + # remember index's hits, search time and calls + benchmark = self.benchmark() + if name not in benchmark: + benchmark[name] = (length, dt, 1) + else: + n, t, c = benchmark[name] + n = int(((n*c) + length) / float(c + 1)) + t = ((t*c) + dt) / float(c + 1) + # reset adaption + if c % REFRESH_RATE == 0: + c = 0 + c += 1 + benchmark[name] = (n, t, c) + def stop(self): self.end_time = time.time() + self.duration = self.end_time - self.start_time - def result(self): - return (self.end_time - self.start_time, tuple(self.res)) + key = self.key() + benchmark = self.benchmark() + prioritymap = self.prioritymap() + prioritymap[key] = benchmark + # calculate mean time of search + stats = getattr(self.catalog, '_v_stats', None) + if stats is None: + stats = self.catalog._v_stats = {} -class CatalogReport(StopWatch): - """Catalog report class to meassure and identify catalog queries. - """ + if key not in stats: + mt = self.duration + c = 1 + else: + mt, c = stats[key] + mt = ((mt * c) + self.duration) / float(c + 1) + c += 1 - def __init__(self, catalog, query=None, threshold=0.1): - super(CatalogReport, self).__init__() + stats[key] = (mt, c) + self.log() - self.catalog = catalog - self.query = query - self.threshold = threshold + def result(self): + return (self.duration, tuple(self.res)) - parent = aq_parent(catalog) - path = getattr(aq_base(parent), 'getPhysicalPath', None) - if path is None: - path = ('', 'NonPersistentCatalog') - else: - path = tuple(parent.getPhysicalPath()) - self.cid = path + def key(self): + if not self._key: + self._key = make_key(self.catalog, self.query) + return self._key - def stop(self): - super(CatalogReport, self).stop() - self.log() - def log(self): # result of stopwatch res = self.result() @@ -171,7 +226,7 @@ # The key calculation takes a bit itself, we want to avoid that for # any fast queries. This does mean that slow queries get the key # calculation overhead added to their runtime. - key = make_key(self.catalog, self.query) + key = self.key() reports_lock.acquire() try: Modified: Zope/trunk/src/Products/ZCatalog/tests/test_catalog.py =================================================================== --- Zope/trunk/src/Products/ZCatalog/tests/test_catalog.py 2010-08-01 19:17:49 UTC (rev 115348) +++ Zope/trunk/src/Products/ZCatalog/tests/test_catalog.py 2010-08-01 20:26:37 UTC (rev 115349) @@ -290,19 +290,18 @@ def test_sorted_search_indexes_one(self): result = self._catalog._sorted_search_indexes({'att1': 'a'}) - self.assertEquals(result, [(True, 'att1')]) + self.assertEquals(result, ['att1']) def test_sorted_search_indexes_many(self): query = {'att1': 'a', 'att2': 'b', 'num': 1} result = self._catalog._sorted_search_indexes(query) - indexes = [r[1] for r in result] - self.assertEquals(set(indexes), set(['att1', 'att2', 'num'])) + self.assertEquals(set(result), set(['att1', 'att2', 'num'])) def test_sorted_search_indexes_priority(self): # att2 and col2 don't support ILimitedResultIndex, att1 does query = {'att1': 'a', 'att2': 'b', 'col2': 'c'} result = self._catalog._sorted_search_indexes(query) - self.assertEquals(result.index((True, 'att1')), 2) + self.assertEquals(result.index('att1'), 2) # search # sortResults _______________________________________________ Zope-Checkins maillist - Zope-Checkins@zope.org https://mail.zope.org/mailman/listinfo/zope-checkins