[Zeitgeist] [Bug 948794] [NEW] Current URI field not used
Public bug reported: Keeping this as a master bug to remind us that we should be using the current uri field in many places now that there's actually something pushing MOVE_EVENTs to Zeitgeist. First place to fix would be FTS. ** Affects: zeitgeist Importance: Undecided Status: New -- You received this bug notification because you are a member of Zeitgeist Framework Team, which is subscribed to Zeitgeist Framework. https://bugs.launchpad.net/bugs/948794 Title: Current URI field not used Status in Zeitgeist Framework: New Bug description: Keeping this as a master bug to remind us that we should be using the current uri field in many places now that there's actually something pushing MOVE_EVENTs to Zeitgeist. First place to fix would be FTS. To manage notifications about this bug go to: https://bugs.launchpad.net/zeitgeist/+bug/948794/+subscriptions ___ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp
[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 414: Use a const int for relevancy result type
revno: 414 committer: Michal Hruby michal@gmail.com branch nick: zeitgeist timestamp: Wed 2012-03-07 16:56:30 +0100 message: Use a const int for relevancy result type modified: extensions/fts++/indexer.cpp -- lp:zeitgeist https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist. To unsubscribe from this branch go to https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription === modified file 'extensions/fts++/indexer.cpp' --- extensions/fts++/indexer.cpp 2012-03-06 22:03:17 + +++ extensions/fts++/indexer.cpp 2012-03-07 15:56:30 + @@ -49,6 +49,7 @@ Xapian::QueryParser::FLAG_WILDCARD const std::string FTS_MAIN_DIR = fts.index; +const int RELEVANCY_RESULT_TYPE = 100; void Indexer::Initialize (GError **error) { @@ -725,7 +726,7 @@ // from the Xapian index because the final result set will be coalesced // on some property of the event guint maxhits; -if (result_type == 100 || +if (result_type == RELEVANCY_RESULT_TYPE || result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS || result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS) { @@ -736,7 +737,7 @@ maxhits = count * 3; } -if (result_type == 100) +if (result_type == RELEVANCY_RESULT_TYPE) { enquire-set_sort_by_relevance (); } @@ -750,7 +751,7 @@ Xapian::MSet hits (enquire-get_mset (offset, maxhits)); Xapian::doccount hitcount = hits.get_matches_estimated (); -if (result_type == 100) +if (result_type == RELEVANCY_RESULT_TYPE) { std::vectorunsigned event_ids; for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter) @@ -840,7 +841,7 @@ guint maxhits = count; -if (result_type == 100) +if (result_type == RELEVANCY_RESULT_TYPE) { enquire-set_sort_by_relevance (); } @@ -863,7 +864,7 @@ Xapian::MSet hits (enquire-get_mset (offset, maxhits)); Xapian::doccount hitcount = hits.get_matches_estimated (); -if (result_type == 100) +if (result_type == RELEVANCY_RESULT_TYPE) { std::vectorunsigned event_ids; std::vectorgdouble relevancy_arr; ___ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp
[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 415: FTS++: Save hashes of URIs and use Xapian's collapse option to group by them
revno: 415 author: Siegfried-Angel Gevatter Pujals siegfr...@gevatter.com committer: Michal Hruby michal@gmail.com branch nick: bluebird timestamp: Mon 2012-03-05 21:40:24 +0100 message: FTS++: Save hashes of URIs and use Xapian's collapse option to group by them when querying with *_SUBJECT result types. modified: extensions/fts++/indexer.cpp extensions/fts++/indexer.h -- lp:zeitgeist https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist. To unsubscribe from this branch go to https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription === modified file 'extensions/fts++/indexer.cpp' --- extensions/fts++/indexer.cpp 2012-03-07 15:56:30 + +++ extensions/fts++/indexer.cpp 2012-03-05 20:40:24 + @@ -23,6 +23,7 @@ #include xapian.h #include queue #include vector +#include cassert #include gio/gio.h #include gio/gdesktopappinfo.h @@ -42,6 +43,7 @@ const Xapian::valueno VALUE_EVENT_ID = 0; const Xapian::valueno VALUE_TIMESTAMP = 1; +const Xapian::valueno VALUE_URI_HASH = 2; #define QUERY_PARSER_FLAGS \ Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \ @@ -101,6 +103,11 @@ this-query_parser-set_database (*this-db); this-enquire = new Xapian::Enquire (*this-db); + +assert (g_checksum_type_get_length (G_CHECKSUM_MD5) == 16); +this-checksum = g_checksum_new (G_CHECKSUM_MD5); +if (!this-checksum) +g_critical (GChecksum initialization failed.); } catch (const Xapian::Error xp_error) @@ -728,7 +735,11 @@ guint maxhits; if (result_type == RELEVANCY_RESULT_TYPE || result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS || -result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS) +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS || +result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS) { maxhits = count; } @@ -746,6 +757,14 @@ enquire-set_sort_by_value (VALUE_TIMESTAMP, true); } +if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS) +{ +enquire-set_collapse_key (VALUE_URI_HASH); +} + Xapian::Query q(query_parser-parse_query (query_string, QUERY_PARSER_FLAGS)); enquire-set_query (q); Xapian::MSet hits (enquire-get_mset (offset, maxhits)); @@ -989,6 +1008,19 @@ return; // ignore this event completely... } + // We need the subject URI so we can use Xapian's collapse key feature + // for *_SUBJECT grouping. However, to save space, we'll just save a hash. + // A better option would be using URI's id, but for that we'd need a SQL + // query that'd be subject to races. + // FIXME(?): This doesn't work for events with multiple subjects. + g_checksum_update (checksum, (guchar *) uri.c_str (), -1); + guint8 uri_hash[17]; + gsize hash_size = 16; + g_checksum_get_digest (checksum, uri_hash, hash_size); + assert (hash_size == 16); + doc.add_value (VALUE_URI_HASH, std::string((char *) uri_hash, 16)); + g_checksum_reset (checksum); + val = zeitgeist_subject_get_text (subject); if (val val[0] != '\0') { === modified file 'extensions/fts++/indexer.h' --- extensions/fts++/indexer.h 2012-02-14 16:56:04 + +++ extensions/fts++/indexer.h 2012-03-05 20:40:24 + @@ -21,6 +21,7 @@ #define _ZGFTS_INDEXER_H_ #include glib-object.h +#include glib/gchecksum.h #include gio/gio.h #include xapian.h @@ -42,6 +43,7 @@ , query_parser (NULL) , enquire (NULL) , tokenizer (NULL) +, checksum (NULL) , clear_failed_id (0) { const gchar *home_dir = g_get_home_dir (); @@ -54,6 +56,7 @@ if (enquire) delete enquire; if (query_parser) delete query_parser; if (db) delete db; +if (checksum) { g_checksum_free (checksum); checksum = NULL; } for (AppInfoMap::iterator it = app_info_cache.begin (); it != app_info_cache.end (); ++it) @@ -120,6 +123,7 @@ Xapian::TermGenerator*tokenizer; AppInfoMapapp_info_cache; ApplicationSetfailed_lookups; + GChecksum *checksum; guint clear_failed_id; std::string home_dir_path; ___ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe :
[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 417: Add talis to tools
revno: 417 committer: Michal Hruby michal@gmail.com branch nick: zeitgeist timestamp: Wed 2012-03-07 21:25:20 +0100 message: Add talis to tools added: tools/talis -- lp:zeitgeist https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist. To unsubscribe from this branch go to https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription === added file 'tools/talis' --- tools/talis 1970-01-01 00:00:00 + +++ tools/talis 2012-03-07 20:25:20 + @@ -0,0 +1,79 @@ +#! /usr/bin/python + +import sys, os +from zeitgeist.datamodel import Event, Subject, ResultType, TimeRange +from zeitgeist.client import ZeitgeistDBusInterface + +USAGE = \ +Talis is a command line tool to search your history. +USAGE: + talis [options] enter search terms here + +OPTIONS: + --mode=, -m= MODE + --offset=, -o= Offset into result set + --hits=, -h= Max number of hits to return + +MODES: + LeastPopularActor + LeastPopularSubjects + LeastRecentActor + LeastRecentEvents + LeastRecentSubjects + MostPopularActor + MostPopularSubjects + MostRecentActor + MostRecentEvents + MostRecentSubjects + Relevancy (default) + + +if len(sys.argv) = 1: + print USAGE + raise SystemExit(1) + +offset = 0 +maxhits = 10 +mode = 100 +mode_string = Relevancy +query = +relevancy_mode = False +for i in range(1, len(sys.argv)): + if sys.argv[i].startswith(--mode=) or sys.argv[i].startswith(-m=): + mode_string = sys.argv[1].replace(--mode=, ).replace(-m=, ).strip() + if mode_string == Relevancy: + mode = 100 + else: + mode = getattr(ResultType, mode_string) + elif sys.argv[i].startswith(--offset=) or sys.argv[i].startswith(-o=): + offset = int(sys.argv[i].replace(--offset=, ).replace(-o=, ).strip()) + elif sys.argv[i].startswith(--hits=) or sys.argv[i].startswith(-h=): + maxhits = int(sys.argv[i].replace(--hits=, ).replace(-h=, ).strip()) + elif sys.argv[i] in [--relevancies, -r]: + relevancy_mode = True + else: + query += + sys.argv[i] + +query = query.strip() + +fts = ZeitgeistDBusInterface().get_extension(Index, index/activity) +if relevancy_mode: + results, relevancies, count = fts.SearchWithRelevancies(query, TimeRange.always(), [], 2, offset, maxhits, mode) +else: + results, count = fts.Search(query, TimeRange.always(), [], offset, maxhits, mode) + +if len(results) == 0: +print No hits for '%s' % query +raise SystemExit(0) + +print %s-%s of %s hits for '%s'. Sorted by '%s': % (offset+1, len(results)+offset, count, query, Relevancy+ + mode_string if relevancy_mode else mode_string) + +i = offset+1 +events = map(Event, results) +for event in events: + for subject in event.subjects: + if relevancy_mode: + print %s. %s\t(%.2f) % (i, subject.uri, relevancies[i-1]) + else: + print %s. %s % (i, subject.uri) + i += 1 ___ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp
[Zeitgeist] [Merge] lp:~mhr3/zeitgeist/fts-secondary-sorting into lp:zeitgeist
Michal Hruby has proposed merging lp:~mhr3/zeitgeist/fts-secondary-sorting into lp:zeitgeist. Requested reviews: Zeitgeist Framework Team (zeitgeist) For more details, see: https://code.launchpad.net/~mhr3/zeitgeist/fts-secondary-sorting/+merge/96479 Implements secondary sorting based on ResultType to SearchWithRelevancies method. -- https://code.launchpad.net/~mhr3/zeitgeist/fts-secondary-sorting/+merge/96479 Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~mhr3/zeitgeist/fts-secondary-sorting into lp:zeitgeist. === modified file 'extensions/fts++/indexer.cpp' --- extensions/fts++/indexer.cpp 2012-03-07 16:08:26 + +++ extensions/fts++/indexer.cpp 2012-03-07 22:37:19 + @@ -23,6 +23,7 @@ #include xapian.h #include queue #include vector +#include cmath #include gio/gio.h #include gio/gdesktopappinfo.h @@ -804,7 +805,6 @@ if (event_templates-len 0) { -ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime (); results = zeitgeist_db_reader_find_events (zg_reader, time_range, event_templates, @@ -813,8 +813,6 @@ result_type, NULL, error); - -g_object_unref (time_range); } else { @@ -841,6 +839,34 @@ return results; } +static gint +sort_events_by_relevance (gconstpointer a, gconstpointer b, gpointer user_data) +{ + gdouble rel1 = 0.0; + gdouble rel2 = 0.0; + std::mapunsigned, gdouble::const_iterator it; + ZeitgeistEvent **e1 = (ZeitgeistEvent**) a; + ZeitgeistEvent **e2 = (ZeitgeistEvent**) b; + std::mapunsigned, gdouble const relevancy_map = +*(static_caststd::mapunsigned, gdouble* (user_data)); + + it = relevancy_map.find (zeitgeist_event_get_id (*e1)); + if (it != relevancy_map.end ()) rel1 = it-second; + + it = relevancy_map.find (zeitgeist_event_get_id (*e2)); + if (it != relevancy_map.end ()) rel2 = it-second; + + gdouble delta = rel1 - rel2; + if (fabs (delta) 0.1) + { +// relevancy of both items is the same, let's make use of stable sort +return e1 e2 ? 1 : -1; + } + + // we want the higher ranked events first + return (delta 0) ? 1 : -1; +} + GPtrArray* Indexer::SearchWithRelevancies (const gchar *search, ZeitgeistTimeRange *time_range, GPtrArray *templates, @@ -860,24 +886,51 @@ guint maxhits = count; -if (result_type == RELEVANCY_RESULT_TYPE) -{ - enquire-set_sort_by_relevance (); -} -else -{ - enquire-set_sort_by_value (VALUE_TIMESTAMP, true); -} - if (storage_state != ZEITGEIST_STORAGE_STATE_ANY) { g_set_error_literal (error, ZEITGEIST_ENGINE_ERROR, ZEITGEIST_ENGINE_ERROR_INVALID_ARGUMENT, - Only ANY stogate state is supported); + Only ANY storage state is supported); return NULL; } +if (result_type == RELEVANCY_RESULT_TYPE) +{ + enquire-set_sort_by_relevance (); +} +else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS) +{ + enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true); + enquire-set_collapse_key (VALUE_EVENT_ID); +} +else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS) +{ + enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true); + enquire-set_collapse_key (VALUE_URI_HASH); +} +else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_ORIGIN || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_ORIGIN || +result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_ORIGIN || +result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_ORIGIN) +{ + // FIXME: not really correct but close :) + enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true); + enquire-set_collapse_key (VALUE_URI_HASH); + maxhits *= 3; +} +else +{ + // throw an error for these? + enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true); + enquire-set_collapse_key (VALUE_EVENT_ID); + maxhits *= 3; +} + Xapian::Query q(query_parser-parse_query (query_string, QUERY_PARSER_FLAGS)); enquire-set_query (q); Xapian::MSet hits (enquire-get_mset (offset, maxhits)); @@ -906,6 +959,8 @@ NULL,