Seif Lotfy has proposed merging lp:~seif/zeitgeist/memory into lp:zeitgeist.
Requested reviews: Zeitgeist Framework Team (zeitgeist) For more details, see: https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848 Reduce memory consumption by: 1) Using generators 2) disable SQL Cache (no real performance decline) 3) Use arrays for storing ids instead of lists ( 4) use tuples instead of lists when possible) Results for this is less memory consumption. I think more can be done if we start using slots... But this is a clean hack without messing up the API/ABI -- https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848 Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~seif/zeitgeist/memory into lp:zeitgeist.
=== modified file '_zeitgeist/engine/datamodel.py' --- _zeitgeist/engine/datamodel.py 2011-01-17 15:54:47 +0000 +++ _zeitgeist/engine/datamodel.py 2011-06-08 11:26:25 +0000 @@ -78,4 +78,4 @@ }.iteritems(): for prop in props: datasource[prop] = plaintype(datasource[prop]) - return list(datasource) + return tuple(datasource) === modified file '_zeitgeist/engine/main.py' --- _zeitgeist/engine/main.py 2011-06-04 14:49:19 +0000 +++ _zeitgeist/engine/main.py 2011-06-08 11:26:25 +0000 @@ -29,6 +29,7 @@ import os import logging from collections import defaultdict +from array import array from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \ ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD @@ -199,8 +200,8 @@ return [] # Split ids into cached and uncached - uncached_ids = [] - cached_ids = [] + uncached_ids = array("i") + cached_ids = array("i") # If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache use_cache = True @@ -238,19 +239,19 @@ sorted_events[n] = event # Get uncached events - rows = tuple(row for row in self._cursor.execute(""" - SELECT * FROM event_view - WHERE id IN (%s) - """ % ",".join("%d" % id for id in uncached_ids))) + rows = self._cursor.execute(""" SELECT * FROM event_view WHERE id IN (%s) + """ % ",".join("%d" % id for id in uncached_ids)) - log.debug("Got %d raw events in %fs" % (len(rows), time.time()-t)) + time_get_uncached = time.time() - t t = time.time() t_get_event = 0 t_get_subject = 0 t_apply_get_hooks = 0 + row_counter = 0 for row in rows: + row_counter += 1 # Assumption: all rows of a same event for its different # subjects are in consecutive order. t_get_event -= time.time() @@ -286,6 +287,7 @@ # at a decent level + log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached)) log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t)) log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event)) log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject)) @@ -561,13 +563,12 @@ if max_events > 0: sql += " LIMIT %d" % max_events - - result = tuple(r[0] for r in self._cursor.execute(sql, where.arguments)) + result = array("i", self._cursor.execute(sql, where.arguments).fetch(0)) if return_mode == 0: log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t)) elif return_mode == 1: - log.debug("Found %d events IDs in %fs" % (len(result), time.time()- t)) + log.debug("Found %d events in %fs" % (len(result), time.time()- t)) result = self.get_events(ids=result, sender=sender) else: raise Exception("%d" % return_mode) === modified file '_zeitgeist/engine/remote.py' --- _zeitgeist/engine/remote.py 2011-06-02 20:15:11 +0000 +++ _zeitgeist/engine/remote.py 2011-06-08 11:26:25 +0000 @@ -77,7 +77,7 @@ for event in events: if event is not None: event._make_dbus_sendable() - return [NULL_EVENT if event is None else event for event in events] + return tuple(NULL_EVENT if event is None else event for event in events) # Reading stuff === modified file '_zeitgeist/engine/sql.py' --- _zeitgeist/engine/sql.py 2011-05-18 20:48:13 +0000 +++ _zeitgeist/engine/sql.py 2011-06-08 11:26:25 +0000 @@ -75,6 +75,14 @@ explain_query(super(UnicodeCursor, self), statement, parameters) return super(UnicodeCursor, self).execute(statement, parameters) + def fetch(self, index=-1): + if index >= 0: + for row in self: + yield row[index] + else: + for row in self: + yield row + def _get_schema_version (cursor, schema_name): """ Returns the schema version for schema_name or returns 0 in case @@ -206,6 +214,8 @@ # we decided to set locking_mode to EXCLUSIVE, from now on only # one connection to the database is allowed to revert this setting set locking_mode to NORMAL. cursor.execute("PRAGMA locking_mode = EXCLUSIVE") + # Seif: Disable cache since we already kinda support our own cache (LRUCache) + cursor.execute("PRAGMA cache_size = 0") # thekorn: as part of the workaround for (LP: #598666) we need to # create the '_fix_cache' TEMP table on every start, === modified file 'test/engine-test.py' --- test/engine-test.py 2011-05-07 12:00:54 +0000 +++ test/engine-test.py 2011-06-08 11:26:25 +0000 @@ -446,7 +446,7 @@ event = Event.new_for_values(subjects=[subj1, subj2]) orig_ids = self.engine.insert_events([event]) result_ids = self.engine.find_eventids(TimeRange.always(), [Event()], StorageState.Any, 0, 1) - self.assertEquals(orig_ids, result_ids) + self.assertEquals(orig_ids, list(result_ids)) def testFindEventsEventTemplate(self): import_events("test/data/five_events.js", self.engine) @@ -603,7 +603,7 @@ [tmpl], StorageState.Any, 10, ResultType.MostRecentEvents) self.assertEquals(1, len(ids)) - self.assertEquals(_ids, ids) + self.assertEquals(_ids, list(ids)) def testNegation(self): import_events("test/data/five_events.js", self.engine) @@ -1035,7 +1035,7 @@ reverse=True ) ] - self.assertEquals(ids, sorted_event_ids) + self.assertEquals(list(ids), sorted_event_ids) def testResultTypesLeastRecentEvents(self): import_events("test/data/five_events.js", self.engine) @@ -1049,7 +1049,7 @@ event.id for event in sorted(events, cmp=lambda x, y: cmp(int(x.timestamp), int(y.timestamp))) ] - self.assertEquals(ids, sorted_event_ids) + self.assertEquals(list(ids), sorted_event_ids) def testResultTypesMostPopularActor(self): import_events("test/data/twenty_events.js", self.engine) @@ -1185,20 +1185,20 @@ # Get the least recent actors ids = self.engine.find_eventids(TimeRange.always(), [], StorageState.Any, 0, ResultType.OldestActor) - self.assertEquals(ids, [1, 3, 4]) + self.assertEquals(list(ids), [1, 3, 4]) # Get the least recent actors for "home/boo" template = Event.new_for_values(subject_uri="home/boo") ids = self.engine.find_eventids(TimeRange.always(), [template], StorageState.Any, 0, ResultType.OldestActor) - self.assertEquals(ids, [2]) + self.assertEquals(list(ids), [2]) # Let's also try the same with MostRecentActor... Although there # should be no problem here. template = Event.new_for_values(subject_uri="home/boo") ids = self.engine.find_eventids(TimeRange.always(), [template], StorageState.Any, 0, ResultType.OldestActor) - self.assertEquals(ids, [2]) + self.assertEquals(list(ids), [2]) def testResultTypesOldestActor(self): import_events("test/data/twenty_events.js", self.engine)
_______________________________________________ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp