Log message for revision 117419: new procedure for adding components Changed: U Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py U Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml U Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py
-=- Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py =================================================================== --- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py 2010-10-10 21:15:16 UTC (rev 117418) +++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py 2010-10-10 22:25:29 UTC (rev 117419) @@ -15,34 +15,70 @@ import logging from Acquisition import aq_parent +from Persistence import PersistentMapping + from App.special_dtml import DTMLFile from BTrees.IIBTree import IIBTree, IITreeSet, IISet, union, intersection, difference from BTrees.OOBTree import OOBTree from BTrees.IOBTree import IOBTree -import BTrees.Length +from BTrees.Length import Length + from zope.interface import implements from ZODB.POSException import ConflictError from Products.PluginIndexes.interfaces import ITransposeQuery from Products.PluginIndexes.interfaces import IUniqueValueIndex -from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex - +from Products.PluginIndexes.common.UnIndex import UnIndex from Products.PluginIndexes.common.util import parseIndexRequest +from Products.PluginIndexes.common import safe_callable from util import PermuteKeywordList +QUERY_OPTIONS = { 'FieldIndex' : ["query","range"] , + 'KeywordIndex' : ["query","operator","range"] } - _marker = [] logger = logging.getLogger('CompositeIndex') -class CompositeIndex(KeywordIndex): +class Component: + + def __init__(self,id,type,attributes): + + self._id = id + self._type = type + + if isinstance(attributes, str): + self._attributes = attributes.split(',') + else: + self._attributes = list(attributes) + + self._attributes = [ attr.strip() for attr in self._attributes if attr ] + + + @property + def id(self): + return self._id + + @property + def type(self): + return self._type + + @property + def attributes(self): + if not self._attributes: + return [self._id] + return self._attributes + + + +class CompositeIndex(UnIndex): + """Index for composition of simple fields. or sequences of items """ @@ -60,8 +96,80 @@ 'help': ('CompositeIndex','CompositeIndex_Settings.stx')}, ) + query_options = ("query","operator", "range") + + def __init__( + self, id, ignore_ex=None, call_methods=None, extra=None, caller=None): + """Create an unindex + + UnIndexes are indexes that contain two index components, the + forward index (like plain index objects) and an inverted + index. The inverted index is so that objects can be unindexed + even when the old value of the object is not known. + + e.g. + + self._index = {datum:[documentId1, documentId2]} + self._unindex = {documentId:datum} + + If any item in self._index has a length-one value, the value is an + integer, and not a set. There are special cases in the code to deal + with this. + + The arguments are: + + 'id' -- the name of the item attribute to index. This is + either an attribute name or a record key. + + 'ignore_ex' -- should be set to true if you want the index + to ignore exceptions raised while indexing instead of + propagating them. + + 'call_methods' -- should be set to true if you want the index + to call the attribute 'id' (note: 'id' should be callable!) + You will also need to pass in an object in the index and + uninded methods for this to work. + + 'extra' -- a mapping object that keeps additional + index-related parameters - subitem 'indexed_attrs' + can be list of dicts with following keys { id, type, attributes } + + 'caller' -- reference to the calling object (usually + a (Z)Catalog instance + """ + + def _get(o, k, default): + """ return a value for a given key of a dict/record 'o' """ + if isinstance(o, dict): + return o.get(k, default) + else: + return getattr(o, k, default) + + self.id = id + self.ignore_ex=ignore_ex # currently unimplimented + self.call_methods=call_methods + + self.operators = ('or', 'and') + self.useOperator = 'or' + + # set components + self._components = PersistentMapping() + for cdata in extra: + c_id = cdata['id'] + c_type = cdata['type'] + c_attributes = cdata['attributes'] + self._components[c_id] = Component(c_id,c_type,c_attributes) + + if not self._components: + self._components[id] = Component(id,'KeywordIndex',None) + + self._length = Length() + self.clear() + + + def clear(self): - self._length = BTrees.Length.Length() + self._length = Length() self._index = IOBTree() self._unindex = IOBTree() @@ -90,7 +198,6 @@ operator = self.useOperator rank=[] - for c, rec in record.keys: # experimental code for specifing the operator if operator == self.useOperator: @@ -98,14 +205,14 @@ if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % escape(operator) - + res = self._apply_component_index(rec,c) - + if res is None: continue res, dummy = res - + rank.append((len(res),res)) @@ -113,10 +220,11 @@ rank.sort() k = None + for l,res in rank: k = intersection(k, res) - + if not k: break @@ -124,44 +232,49 @@ # switch to intersecton mode if operator == 'or': + res = None set_func = union else: + res = resultset set_func = intersection + + rank=[] if set_func == intersection: - res = None for key in k: - set=self._index.get(key, IISet()) - rank.append((len(set),key)) + + s=self._index.get(key, IISet()) + if isinstance(s, int): + rank.append((1,key)) + else: + rank.append((len(s),key)) # sort from short to long sets rank.sort() - + else: - res = None # dummy length if k: rank = enumerate(k) - # collect docIds for l,key in rank: - set=self._index.get(key, None) - if set is None: - set = IISet(()) - elif isinstance(set, int): - set = IISet((set,)) - res = set_func(res, set) + s=self._index.get(key, None) + if s is None: + s = IISet(()) + elif isinstance(s, int): + s = IISet((s,)) + res = set_func(res, s) if not res and set_func is intersection: break - if isinstance(res, int): r=IISet((res,)) + if isinstance(res, int): res = IISet((res,)) if res is None: - return IISet(),(self.id,) + res = IISet(),(self.id,) return res, (self.id,) @@ -201,19 +314,20 @@ else: setlist = index.items(lo) - for k, set in setlist: - if isinstance(set, tuple): - set = IISet((set,)) + for k, s in setlist: + if isinstance(s, tuple): + s = IISet((s,)) r = union(r, set) else: # not a range search for key in record.keys: - set=index.get(key, None) - if set is None: - set = IISet(()) - elif isinstance(set, int): - set = IISet((set,)) - r = union(r, set) + s=index.get(key, None) + if s is None: + s = IISet(()) + elif isinstance(s, int): + s = IISet((s,)) + r = union(r, s) + if isinstance(r, int): r=IISet((r,)) @@ -232,7 +346,7 @@ return res - def _index_object(self, documentId, obj, threshold=None, attr=''): + def _index_object(self, documentId, obj, threshold=None): """ index an object 'obj' with integer id 'i' Ideally, we've been passed a sequence of some sort that we @@ -246,17 +360,14 @@ # we'll do so. # unhashed keywords - newUKeywords = self._get_object_keywords(obj, attr) - - + newUKeywords = self._get_permuted_keywords(obj) + # hashed keywords newKeywords = map(lambda x: hash(x),newUKeywords) for i, kw in enumerate(newKeywords): if not self._tindex.get(kw,None): self._tindex[kw]=newUKeywords[i] - - newKeywords = map(lambda x: hash(x),newUKeywords) @@ -299,6 +410,25 @@ return 1 + def unindex_objectKeywords(self, documentId, keywords): + """ carefully unindex the object with integer id 'documentId'""" + + if keywords is not None: + for kw in keywords: + self.removeForwardIndexEntry(kw, documentId) + + def unindex_object(self, documentId): + """ carefully unindex the object with integer id 'documentId'""" + + keywords = self._unindex.get(documentId, None) + self.unindex_objectKeywords(documentId, keywords) + try: + del self._unindex[documentId] + except KeyError: + logger.debug('Attempt to unindex nonexistent' + ' document id %s' % documentId) + + def insertForwardIndexEntry(self, entry, documentId): """Take the entry provided and put it in the correct place in the forward index. @@ -382,49 +512,78 @@ 'should not happen.' % (self.__class__.__name__, repr(components),str(self.id),str(c))) - def _get_object_keywords(self, obj, attr): - """ composite keyword lists """ + def _get_permuted_keywords(self, obj): + """ returns permutation list of object keywords """ - fields = self.getComponentIndexAttributes() + components = self.getIndexComponents() kw_list = [] - - for attributes in fields: - kw = [] - for attr in attributes: - kw.extend(list(super(CompositeIndex,self)._get_object_keywords(obj, attr))) + + for c in components: + kw=self._get_keywords(obj, c) kw_list.append(kw) pkl = PermuteKeywordList(kw_list) return pkl.keys + + def _get_keywords(self,obj,component): + + if component.type == 'FieldIndex': + attr = component.attributes[-1] + try: + datum = getattr(obj, attr) + if safe_callable(datum): + datum = datum() + except (AttributeError, TypeError): + datum = _marker + if isinstance(datum,list): + datum = tuple(datum) + return (datum,) + + elif component.type == 'KeywordIndex': + for attr in component.attributes: + datum = [] + newKeywords = getattr(obj, attr, ()) + if safe_callable(newKeywords): + try: + newKeywords = newKeywords() + except AttributeError: + continue + if not newKeywords and newKeywords is not False: + continue + elif isinstance(newKeywords, basestring): #Python 2.1 compat isinstance + datum.append(newKeywords) + else: + unique = {} + try: + for k in newKeywords: + unique[k] = None + except TypeError: + # Not a sequence + datum.append(newKeywords) + else: + datum.extend(unique.keys()) + return datum + else: + raise KeyError + + def getIndexComponents(self): + """ return sequence of indexed attributes """ + return self._components.values() + + def getComponentIndexNames(self): """ returns component index names to composite """ - ids = [] + return self._components.keys() - fields = self.getIndexSourceNames() - for attr in fields: - c = attr.split(':') - ids.append(c.pop()) - - return tuple(ids) - def getComponentIndexAttributes(self): """ returns list of attributes of each component index to composite""" - attributes=[] - - fields = self.getIndexSourceNames() - for idx in fields: - attr = idx.split(':') - if len(attr) == 1: - attributes.append(attr) - else: - attributes.append(attr[1:]) + return tuple([a.attributes for a in self._components.values()]) - return tuple(attributes) def getEntryForObject(self, documentId, default=_marker): """Takes a document ID and returns all the information we have @@ -461,8 +620,9 @@ # default: return unique values from first component - if name is None: - name = self.getComponentIndexNames()[0] + if name is None: + return super(CompositeIndex,self).uniqueValues( name=name, withLengths=withLengths) + if self._cindexes.has_key(name): index = self._cindexes[name] @@ -503,36 +663,26 @@ cquery = query.copy() - cIdxs = self.getComponentIndexNames() + components = self.getIndexComponents() records=[] - for name in cIdxs: - abort = False - - #TODO query_options - # if intex_type == "FieldIndex": - # query_options = ["query","range"] - # elif intex_type == "KeywordIndex": - # query_options = ["query","operator","range"] + + for c in components: + query_options = QUERY_OPTIONS[c.type] + rec = parseIndexRequest(query, c.id, query_options) - query_options = ["query","range"] - rec = parseIndexRequest(query, name, query_options) - - - if rec.keys is None: continue - - records.append((name, rec)) - + records.append((c.id, rec)) + if not records: + return query cquery.update( { self.id: { 'query': records }} ) - # delete obsolete query attributes from request - for i in cIdxs[:len(records)+1]: + for i in [ r[0] for r in records ]: if cquery.has_key(i): del cquery[i] @@ -540,8 +690,6 @@ return cquery - - manage = manage_main = DTMLFile('dtml/manageCompositeIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('dtml/browseIndex', globals()) Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml =================================================================== --- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml 2010-10-10 21:15:16 UTC (rev 117418) +++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/dtml/addCompositeIndex.dtml 2010-10-10 22:25:29 UTC (rev 117419) @@ -26,7 +26,7 @@ </div> </td> <td align="left" valign="top"> - <input type="text" name="id" size="40" /> + <input type="text" name="id" size="10" /> </td> </tr> @@ -38,9 +38,13 @@ </div> </td> <td align="left" valign="top"> - <input type="text" name="extra.indexed_attrs:record:string" size="40" /><br/> - <em>indexId1,indexId2,...</em> or<br/> - <em>indexId1:attribute11:attribute12:...,indexId2:attribute21,...</em> + <input type="text" name="extra.id:records:string" size="10" /> + <select name="extra.type:records:string"> + <option value="">FieldIndex</option> + <option value="">KeywordIndex</option> + </select> + <input type="text" name="extra.attributes:records:string" size="40" /><br/> + </td> </tr> Modified: Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py =================================================================== --- Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py 2010-10-10 21:15:16 UTC (rev 117418) +++ Zope/branches/andig-compositeindex/src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py 2010-10-10 22:25:29 UTC (rev 117419) @@ -54,7 +54,10 @@ def setUp(self): - self._index = CompositeIndex('comp01',extra = {'indexed_attrs': 'is_default_page,review_state,portal_type'}) + self._index = CompositeIndex('comp01', + extra = [ { 'id': 'is_default_page' ,'type': 'FieldIndex','attributes':''}, + { 'id': 'review_state' ,'type': 'FieldIndex','attributes':''}, + { 'id': 'portal_type' ,'type': 'FieldIndex','attributes':''}]) self._field_indexes = ( FieldIndex('review_state'), FieldIndex('portal_type'), FieldIndex('is_default_page')) @@ -67,20 +70,25 @@ r = index._apply_index(req) if r is not None: r, u = r - w, rs = weightedIntersection(rs, r) - if not rs: - break - return rs + w, rs = weightedIntersection(rs, r) + if not rs: + break + if not rs: + return set() + return set(rs) def _compositeSearch(self, req, expectedValues=None): + query = self._index.make_query(req) rs = None r = self._index._apply_index(query) if r is not None: r, u = r - w, rs = weightedIntersection(rs, r) - return rs + w, rs = weightedIntersection(rs, r) + if not rs: + return set() + return set(rs) def _populateIndexes(self, k , v): @@ -89,6 +97,7 @@ index.index_object( k, v ) + def _clearIndexes(self): self._index.clear() for index in self._field_indexes: @@ -96,9 +105,10 @@ def testPerformance(self): - lengths = [10,100,1000,10000,100000] + lengths = [1000,10000,100000] - queries = [{ 'portal_type' : { 'query': 'Document' } , + queries = [{ 'portal_type' : { 'query': 'Document' }} , + { 'portal_type' : { 'query': 'Document' } , 'review_state' : { 'query': 'pending' } } ,\ { 'is_default_page': { 'query' : True }, 'portal_type' : { 'query': 'Document' } , @@ -110,18 +120,16 @@ st = time() res1 = self._defaultSearch(*args, **kw) - print list(res1) print "atomic: %s hits in %3.2fms" % (len(res1), (time() -st)*1000) st = time() res2 = self._compositeSearch(*args, **kw) - print list(res2) print "composite: %s hits in %3.2fms" % (len(res2), (time() -st)*1000) self.assertEqual(len(res1),len(res2)) + + self.assertEqual(res1,res2) - for i,v in enumerate(res1): - self.assertEqual(res1[i], res2[i]) @@ -130,13 +138,11 @@ print "************************************" print "indexed objects: %s" % l for i in range(l): - name = 'dummy%s' % i + name = '%s' % i obj = RandomTestObject(name) - print obj self._populateIndexes(i,obj) for query in queries: - print query profileSearch(query) _______________________________________________ Zope-Checkins maillist - Zope-Checkins@zope.org https://mail.zope.org/mailman/listinfo/zope-checkins