Mikkel Kamstrup Erlandsen has proposed merging lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist.
Requested reviews: Zeitgeist Framework Team (zeitgeist) Huzzah! Smackeroo! I have query expansion fully working now all unit tests passing. Both on the SQL level and on our template matching level. So what does "query expansion" mean. Consider a query for subject with interp. nfo:Media. Right that would only match stuff that has been explicitly identified as nfo:Media (which is not much since we usually can identify whther stuff is Audio, Image, or Video data). With query expansion we'll also match any children of nfo:Media. Ie also nfo:Image, nfo:Audio, and nfo:Video. Also recursively matching children of these like nfo:RasterImage and nfo:Vector image. The way it's implemented is really simple. We simply expand the tree of children and compile a big OR query with everything. -- https://code.launchpad.net/~kamstrup/zeitgeist/query-expansion/+merge/25000 Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py' --- _zeitgeist/engine/main.py 2010-05-01 22:18:55 +0000 +++ _zeitgeist/engine/main.py 2010-05-10 14:47:20 +0000 @@ -32,7 +32,7 @@ from collections import defaultdict from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \ - ResultType, get_timestamp_for_now, Interpretation + ResultType, get_timestamp_for_now, Interpretation, Symbol from _zeitgeist.engine.datamodel import Event, Subject from _zeitgeist.engine.extension import ExtensionsCollection, load_class from _zeitgeist.engine import constants @@ -163,16 +163,51 @@ for (event_template, subject_template) in self._build_templates(templates): subwhere = WhereClause(WhereClause.AND) try: - for key in ("interpretation", "manifestation", "actor"): - value = getattr(event_template, key) - if value: - subwhere.add("%s = ?" % key, - getattr(self, "_" + key).id(value)) - for key in ("interpretation", "manifestation", "mimetype"): - value = getattr(subject_template, key) - if value: - subwhere.add("subj_%s = ?" % key, - getattr(self, "_" + key).id(value)) + # Expand event interpretation children + event_interp_where = WhereClause(WhereClause.OR) + for child_interp in (Symbol.find_child_uris_extended(event_template.interpretation)): + if child_interp: + event_interp_where.add("interpretation = ?", + self._interpretation.id(child_interp)) + if event_interp_where: + subwhere.extend(event_interp_where) + + # Expand event manifestation children + event_manif_where = WhereClause(WhereClause.OR) + for child_manif in (Symbol.find_child_uris_extended(event_template.manifestation)): + if child_manif: + event_manif_where.add("manifestation = ?", + self._manifestation.id(child_manif)) + if event_manif_where: + subwhere.extend(event_manif_where) + + # Expand subject interpretation children + su_interp_where = WhereClause(WhereClause.OR) + for child_interp in (Symbol.find_child_uris_extended(subject_template.interpretation)): + if child_interp: + su_interp_where.add("subj_interpretation = ?", + self._interpretation.id(child_interp)) + if su_interp_where: + subwhere.extend(su_interp_where) + + # Expand subject manifestation children + su_manif_where = WhereClause(WhereClause.OR) + for child_manif in (Symbol.find_child_uris_extended(subject_template.manifestation)): + if child_manif: + su_manif_where.add("subj_manifestation = ?", + self._manifestation.id(child_manif)) + if su_manif_where: + subwhere.extend(su_manif_where) + + # FIXME: Expand mime children as well. + # Right now we only do exact matching for mimetypes + if subject_template.mimetype: + subwhere.add("subj_mimetype = ?", + self._mimetype.id(subject_tempalte.mimetype)) + + if event_template.actor: + subwhere.add("actor = ?", + self._actor.id(event_template.actor)) except KeyError: # Value not in DB where_or.register_no_result() @@ -183,6 +218,7 @@ subwhere.add("subj_%s = ?" % key, value) where_or.extend(subwhere) + print "SQL: ", where_or.sql, where_or.arguments return where_or def _build_sql_event_filter(self, time_range, templates, storage_state): === modified file 'test/datamodel-test.py' --- test/datamodel-test.py 2010-04-26 19:42:07 +0000 +++ test/datamodel-test.py 2010-05-10 14:47:20 +0000 @@ -51,6 +51,47 @@ self.assertTrue(f.display_name != None) self.assertTrue(f.doc != None) +class RelationshipTest (unittest.TestCase): + """ + Tests for parent/child relationships in the loaded ontologies + """ + + def testDirectParents (self): + """ + Tests relationship tracking for immediate parents + """ + self.assertTrue(Interpretation.AUDIO.is_a(Interpretation.MEDIA)) + + def testSecondLevelParents (self): + """ + Tests relationship tracking for second level parents + """ + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.MEDIA)) + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.IMAGE)) + + def testRootParents (self): + """ + Tests relationship tracking for root nodes, ie Interpretation + and Manifestation + """ + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation)) + self.assertTrue(Manifestation.FILE_DATA_OBJECT.is_a(Manifestation)) + self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation)) + + def testReflecsive (self): + """ + Assert that a symbol is a child of itself + """ + self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation.USER_ACTIVITY)) + + def testFindExtendedChildren (self): + self.assertEquals(["foo://bar"], Symbol.find_child_uris_extended("foo://bar")) + self.assertEquals(["http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Icon", + "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#VectorImage", + "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Cursor", + "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#RasterImage", + "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"], + Symbol.find_child_uris_extended("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image")) class EventTest (unittest.TestCase): def setUp(self): @@ -116,6 +157,17 @@ e.manifestation="ILLEGAL SNAFU" self.assertFalse(e.matches_template(template)) + def testTemplateParentMatching(self): + template = Event.new_for_values( + manifestation=Manifestation.EVENT_MANIFESTATION, + subject_interpretation=Interpretation) + + e = Event.new_for_values( + manifestation=Manifestation.USER_ACTIVITY, + subject_interpretation=Interpretation.TEXT_DOCUMENT, + subject_text="Foo") + self.assertTrue(e.matches_template(template)) + def testTemplateFiltering(self): template = Event.new_for_values(interpretation="stfu:OpenEvent") events = parse_events("test/data/five_events.js") === added file 'test/test-sql.py' --- test/test-sql.py 1970-01-01 00:00:00 +0000 +++ test/test-sql.py 2010-05-10 14:47:20 +0000 @@ -0,0 +1,51 @@ +#! /usr/bin/python +# -.- coding: utf-8 -.- + +# Zeitgeist +# +# Copyright © 2010 Mikkel Kamstrup Erlandsen <mikkel.kamst...@gmail.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import unittest +from _zeitgeist.engine.sql import * + +class SQLTest (unittest.TestCase): + + def testFlat (self): + where = WhereClause(WhereClause.AND) + where.add ("foo = %s", 10) + where.add ("bar = %s", 27) + self.assertEquals(where.sql % tuple(where.arguments), + "(foo = 10 AND bar = 27)") + + def testNested (self): + where = WhereClause(WhereClause.AND) + where.add ("foo = %s", 10) + + subwhere = WhereClause(WhereClause.OR) + subwhere.add ("subfoo = %s", 68) + subwhere.add ("subbar = %s", 69) + where.extend(subwhere) + where.add ("bar = %s", 11) + + self.assertEquals(where.sql % tuple(where.arguments), + "(foo = 10 AND (subfoo = 68 OR subbar = 69) AND bar = 11)") + +if __name__ == "__main__": + unittest.main() === modified file 'zeitgeist/datamodel.py' --- zeitgeist/datamodel.py 2010-04-29 08:28:44 +0000 +++ zeitgeist/datamodel.py 2010-05-10 14:47:20 +0000 @@ -185,6 +185,22 @@ dikt[self.name] = self for child in self._children.itervalues(): child._visit(dikt) + + @staticmethod + def find_child_uris_extended (uri): + """ + Creates a list of all known child URIs of `uri`, including + `uri` itself in the list. Hence the "extended". If `uri` + is unknown a list containing only `uri` is returned. + """ + try: + symbol = _SYMBOLS_BY_URI[uri] + children = [child.uri for child in symbol.get_all_children()] + children.append(uri) + return children + except KeyError, e: + return [uri] + @property def uri(self): @@ -236,7 +252,51 @@ Returns a list of immediate parent symbols """ return frozenset(self._parents.itervalues()) - + + def is_a (self, parent): + """ + Returns True if this symbol is a child of `parent`. + """ + if not isinstance (parent, Symbol): + try: + parent = _SYMBOLS_BY_URI[parent] + except KeyError, e: + # Parent is not a known URI + print 11111111111, self.uri, parent + return self.uri == parent + + # Invariant: parent is a Symbol + if self.uri == parent.uri : return True + + parent._ensure_all_children() + + # FIXME: We should really check that child.uri is in there, + # but that is not fast with the current code layout + return self.name in parent._all_children + + @staticmethod + def uri_is_a (child, parent): + """ + Returns True if `child` is a child of `parent`. Both `child` + and `parent` arguments must be any combination of + :class:`Symbol` and/or string. + """ + if isinstance (child, basestring): + try: + child = _SYMBOLS_BY_URI[child] + except KeyError, e: + # Child is not a know URI + if isinstance (parent, basestring): + return child == parent + elif isinstance (parent, Symbol): + return child == parent.uri + else: + return False + + if not isinstance (child, Symbol): + raise ValueError("Child argument must be a Symbol or string. Got %s" % type(child)) + + return child.is_a(parent) class TimeRange(list): """ @@ -463,11 +523,13 @@ """ Return True if this Subject matches *subject_template*. Empty fields in the template are treated as wildcards. + Interpretations and manifestations are also matched if they are + children of the types specified in `subject_template`. See also :meth:`Event.matches_template` """ for m in Subject.Fields: - if subject_template[m] and subject_template[m] != self[m] : + if subject_template[m] and not Symbol.uri_is_a (self[m], subject_template[m]): return False return True @@ -693,7 +755,9 @@ """ Return True if this event matches *event_template*. The matching is done where unset fields in the template is - interpreted as wild cards. If the template has more than one + interpreted as wild cards. Interpretations and manifestations + are also matched if they are children of the types specified + in `event_template`. If the template has more than one subject, this event matches if at least one of the subjects on this event matches any single one of the subjects on the template. @@ -707,7 +771,7 @@ tdata = event_template[0] for m in Event.Fields: if m == Event.Timestamp : continue - if tdata[m] and tdata[m] != data[m] : return False + if tdata[m] and not Symbol.uri_is_a (data[m], tdata[m]) : return False # If template has no subjects we have a match if len(event_template[1]) == 0 : return True
_______________________________________________ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp