[Zeitgeist] [Bug 948794] [NEW] Current URI field not used

2012-03-07 Thread Michal Hruby
Public bug reported:

Keeping this as a master bug to remind us that we should be using the
current uri field in many places now that there's actually something
pushing MOVE_EVENTs to Zeitgeist. First place to fix would be FTS.

** Affects: zeitgeist
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of Zeitgeist
Framework Team, which is subscribed to Zeitgeist Framework.
https://bugs.launchpad.net/bugs/948794

Title:
  Current URI field not used

Status in Zeitgeist Framework:
  New

Bug description:
  Keeping this as a master bug to remind us that we should be using the
  current uri field in many places now that there's actually something
  pushing MOVE_EVENTs to Zeitgeist. First place to fix would be FTS.

To manage notifications about this bug go to:
https://bugs.launchpad.net/zeitgeist/+bug/948794/+subscriptions

___
Mailing list: https://launchpad.net/~zeitgeist
Post to : zeitgeist@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zeitgeist
More help   : https://help.launchpad.net/ListHelp


[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 414: Use a const int for relevancy result type

2012-03-07 Thread noreply

revno: 414
committer: Michal Hruby michal@gmail.com
branch nick: zeitgeist
timestamp: Wed 2012-03-07 16:56:30 +0100
message:
  Use a const int for relevancy result type
modified:
  extensions/fts++/indexer.cpp


--
lp:zeitgeist
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird

Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist.
To unsubscribe from this branch go to 
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp	2012-03-06 22:03:17 +
+++ extensions/fts++/indexer.cpp	2012-03-07 15:56:30 +
@@ -49,6 +49,7 @@
   Xapian::QueryParser::FLAG_WILDCARD
 
 const std::string FTS_MAIN_DIR = fts.index;
+const int RELEVANCY_RESULT_TYPE = 100;
 
 void Indexer::Initialize (GError **error)
 {
@@ -725,7 +726,7 @@
 // from the Xapian index because the final result set will be coalesced
 // on some property of the event
 guint maxhits;
-if (result_type == 100 ||
+if (result_type == RELEVANCY_RESULT_TYPE ||
 result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
 result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
 {
@@ -736,7 +737,7 @@
   maxhits = count * 3;
 }
 
-if (result_type == 100)
+if (result_type == RELEVANCY_RESULT_TYPE)
 {
   enquire-set_sort_by_relevance ();
 }
@@ -750,7 +751,7 @@
 Xapian::MSet hits (enquire-get_mset (offset, maxhits));
 Xapian::doccount hitcount = hits.get_matches_estimated ();
 
-if (result_type == 100)
+if (result_type == RELEVANCY_RESULT_TYPE)
 {
   std::vectorunsigned event_ids;
   for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter)
@@ -840,7 +841,7 @@
 
 guint maxhits = count;
 
-if (result_type == 100)
+if (result_type == RELEVANCY_RESULT_TYPE)
 {
   enquire-set_sort_by_relevance ();
 }
@@ -863,7 +864,7 @@
 Xapian::MSet hits (enquire-get_mset (offset, maxhits));
 Xapian::doccount hitcount = hits.get_matches_estimated ();
 
-if (result_type == 100)
+if (result_type == RELEVANCY_RESULT_TYPE)
 {
   std::vectorunsigned event_ids;
   std::vectorgdouble relevancy_arr;

___
Mailing list: https://launchpad.net/~zeitgeist
Post to : zeitgeist@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zeitgeist
More help   : https://help.launchpad.net/ListHelp


[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 415: FTS++: Save hashes of URIs and use Xapian's collapse option to group by them

2012-03-07 Thread noreply

revno: 415
author: Siegfried-Angel Gevatter Pujals siegfr...@gevatter.com
committer: Michal Hruby michal@gmail.com
branch nick: bluebird
timestamp: Mon 2012-03-05 21:40:24 +0100
message:
  FTS++: Save hashes of URIs and use Xapian's collapse option to group by them
 when querying with *_SUBJECT result types.
modified:
  extensions/fts++/indexer.cpp
  extensions/fts++/indexer.h


--
lp:zeitgeist
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird

Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist.
To unsubscribe from this branch go to 
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp	2012-03-07 15:56:30 +
+++ extensions/fts++/indexer.cpp	2012-03-05 20:40:24 +
@@ -23,6 +23,7 @@
 #include xapian.h
 #include queue
 #include vector
+#include cassert
 
 #include gio/gio.h
 #include gio/gdesktopappinfo.h
@@ -42,6 +43,7 @@
 
 const Xapian::valueno VALUE_EVENT_ID = 0;
 const Xapian::valueno VALUE_TIMESTAMP = 1;
+const Xapian::valueno VALUE_URI_HASH = 2;
 
 #define QUERY_PARSER_FLAGS \
   Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
@@ -101,6 +103,11 @@
 this-query_parser-set_database (*this-db);
 
 this-enquire = new Xapian::Enquire (*this-db);
+
+assert (g_checksum_type_get_length (G_CHECKSUM_MD5) == 16);
+this-checksum = g_checksum_new (G_CHECKSUM_MD5);
+if (!this-checksum)
+g_critical (GChecksum initialization failed.);
 
   }
   catch (const Xapian::Error xp_error)
@@ -728,7 +735,11 @@
 guint maxhits;
 if (result_type == RELEVANCY_RESULT_TYPE ||
 result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
-result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
 {
   maxhits = count;
 }
@@ -746,6 +757,14 @@
   enquire-set_sort_by_value (VALUE_TIMESTAMP, true);
 }
 
+if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
+{
+enquire-set_collapse_key (VALUE_URI_HASH);
+}
+
 Xapian::Query q(query_parser-parse_query (query_string, QUERY_PARSER_FLAGS));
 enquire-set_query (q);
 Xapian::MSet hits (enquire-get_mset (offset, maxhits));
@@ -989,6 +1008,19 @@
 return; // ignore this event completely...
   }
 
+  // We need the subject URI so we can use Xapian's collapse key feature
+  // for *_SUBJECT grouping. However, to save space, we'll just save a hash.
+  // A better option would be using URI's id, but for that we'd need a SQL
+  // query that'd be subject to races.
+  // FIXME(?): This doesn't work for events with multiple subjects.
+  g_checksum_update (checksum, (guchar *) uri.c_str (), -1);
+  guint8 uri_hash[17];
+  gsize hash_size = 16;
+  g_checksum_get_digest (checksum, uri_hash, hash_size);
+  assert (hash_size == 16);
+  doc.add_value (VALUE_URI_HASH, std::string((char *) uri_hash, 16));
+  g_checksum_reset (checksum);
+
   val = zeitgeist_subject_get_text (subject);
   if (val  val[0] != '\0')
   {

=== modified file 'extensions/fts++/indexer.h'
--- extensions/fts++/indexer.h	2012-02-14 16:56:04 +
+++ extensions/fts++/indexer.h	2012-03-05 20:40:24 +
@@ -21,6 +21,7 @@
 #define _ZGFTS_INDEXER_H_
 
 #include glib-object.h
+#include glib/gchecksum.h
 #include gio/gio.h
 #include xapian.h
 
@@ -42,6 +43,7 @@
 , query_parser (NULL)
 , enquire (NULL)
 , tokenizer (NULL)
+, checksum (NULL)
 , clear_failed_id (0)
   {
 const gchar *home_dir = g_get_home_dir ();
@@ -54,6 +56,7 @@
 if (enquire) delete enquire;
 if (query_parser) delete query_parser;
 if (db) delete db;
+if (checksum) { g_checksum_free (checksum); checksum = NULL; }
 
 for (AppInfoMap::iterator it = app_info_cache.begin ();
  it != app_info_cache.end (); ++it)
@@ -120,6 +123,7 @@
   Xapian::TermGenerator*tokenizer;
   AppInfoMapapp_info_cache;
   ApplicationSetfailed_lookups;
+  GChecksum *checksum;
 
   guint clear_failed_id;
   std::string   home_dir_path;

___
Mailing list: https://launchpad.net/~zeitgeist
Post to : zeitgeist@lists.launchpad.net
Unsubscribe : 

[Zeitgeist] [Branch ~zeitgeist/zeitgeist/bluebird] Rev 417: Add talis to tools

2012-03-07 Thread noreply

revno: 417
committer: Michal Hruby michal@gmail.com
branch nick: zeitgeist
timestamp: Wed 2012-03-07 21:25:20 +0100
message:
  Add talis to tools
added:
  tools/talis


--
lp:zeitgeist
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird

Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist.
To unsubscribe from this branch go to 
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription
=== added file 'tools/talis'
--- tools/talis	1970-01-01 00:00:00 +
+++ tools/talis	2012-03-07 20:25:20 +
@@ -0,0 +1,79 @@
+#! /usr/bin/python
+
+import sys, os
+from zeitgeist.datamodel import Event, Subject, ResultType, TimeRange
+from zeitgeist.client import ZeitgeistDBusInterface
+
+USAGE = \
+Talis is a command line tool to search your history.
+USAGE:
+	talis [options] enter search terms here
+
+OPTIONS:
+	--mode=, -m=   MODE
+	--offset=, -o= Offset into result set
+	--hits=, -h=   Max number of hits to return
+
+MODES:
+	LeastPopularActor
+	LeastPopularSubjects
+	LeastRecentActor
+	LeastRecentEvents
+	LeastRecentSubjects
+	MostPopularActor
+	MostPopularSubjects
+	MostRecentActor
+	MostRecentEvents
+	MostRecentSubjects
+	Relevancy (default)
+
+
+if len(sys.argv) = 1:
+	print USAGE
+	raise SystemExit(1)
+
+offset = 0
+maxhits = 10
+mode = 100
+mode_string = Relevancy
+query = 
+relevancy_mode = False
+for i in range(1, len(sys.argv)):
+	if sys.argv[i].startswith(--mode=) or sys.argv[i].startswith(-m=):
+		mode_string = sys.argv[1].replace(--mode=, ).replace(-m=, ).strip()
+		if mode_string == Relevancy:
+			mode = 100
+		else:
+			mode = getattr(ResultType, mode_string)
+	elif sys.argv[i].startswith(--offset=) or sys.argv[i].startswith(-o=):
+		offset = int(sys.argv[i].replace(--offset=, ).replace(-o=, ).strip())
+	elif sys.argv[i].startswith(--hits=) or sys.argv[i].startswith(-h=):
+		maxhits = int(sys.argv[i].replace(--hits=, ).replace(-h=, ).strip())
+	elif sys.argv[i] in [--relevancies, -r]:
+		relevancy_mode = True
+	else:
+		query +=   + sys.argv[i]
+		
+query = query.strip()
+
+fts = ZeitgeistDBusInterface().get_extension(Index, index/activity)
+if relevancy_mode:
+	results, relevancies, count = fts.SearchWithRelevancies(query, TimeRange.always(), [], 2, offset, maxhits, mode)
+else:
+	results, count = fts.Search(query, TimeRange.always(), [], offset, maxhits, mode)
+
+if len(results) == 0:
+print No hits for '%s' % query
+raise SystemExit(0)
+
+print %s-%s of %s hits for '%s'. Sorted by '%s': % (offset+1, len(results)+offset, count, query, Relevancy+ + mode_string if relevancy_mode else mode_string)
+
+i = offset+1
+events = map(Event, results)
+for event in events:
+	for subject in event.subjects:
+		if relevancy_mode:
+			print  %s. %s\t(%.2f) % (i, subject.uri, relevancies[i-1])
+		else:
+			print  %s. %s % (i, subject.uri)
+		i += 1

___
Mailing list: https://launchpad.net/~zeitgeist
Post to : zeitgeist@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zeitgeist
More help   : https://help.launchpad.net/ListHelp


[Zeitgeist] [Merge] lp:~mhr3/zeitgeist/fts-secondary-sorting into lp:zeitgeist

2012-03-07 Thread Michal Hruby
Michal Hruby has proposed merging lp:~mhr3/zeitgeist/fts-secondary-sorting into 
lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)

For more details, see:
https://code.launchpad.net/~mhr3/zeitgeist/fts-secondary-sorting/+merge/96479

Implements secondary sorting based on ResultType to SearchWithRelevancies 
method.
-- 
https://code.launchpad.net/~mhr3/zeitgeist/fts-secondary-sorting/+merge/96479
Your team Zeitgeist Framework Team is requested to review the proposed merge of 
lp:~mhr3/zeitgeist/fts-secondary-sorting into lp:zeitgeist.
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp	2012-03-07 16:08:26 +
+++ extensions/fts++/indexer.cpp	2012-03-07 22:37:19 +
@@ -23,6 +23,7 @@
 #include xapian.h
 #include queue
 #include vector
+#include cmath
 
 #include gio/gio.h
 #include gio/gdesktopappinfo.h
@@ -804,7 +805,6 @@
 
   if (event_templates-len  0)
   {
-ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime ();
 results = zeitgeist_db_reader_find_events (zg_reader,
time_range,
event_templates,
@@ -813,8 +813,6 @@
result_type,
NULL,
error);
-
-g_object_unref (time_range);
   }
   else
   {
@@ -841,6 +839,34 @@
   return results;
 }
 
+static gint
+sort_events_by_relevance (gconstpointer a, gconstpointer b, gpointer user_data)
+{
+  gdouble rel1 = 0.0;
+  gdouble rel2 = 0.0;
+  std::mapunsigned, gdouble::const_iterator it;
+  ZeitgeistEvent **e1 = (ZeitgeistEvent**) a;
+  ZeitgeistEvent **e2 = (ZeitgeistEvent**) b;
+  std::mapunsigned, gdouble const relevancy_map =
+*(static_caststd::mapunsigned, gdouble* (user_data));
+
+  it = relevancy_map.find (zeitgeist_event_get_id (*e1));
+  if (it != relevancy_map.end ()) rel1 = it-second;
+
+  it = relevancy_map.find (zeitgeist_event_get_id (*e2));
+  if (it != relevancy_map.end ()) rel2 = it-second;
+
+  gdouble delta = rel1 - rel2;
+  if (fabs (delta)  0.1)
+  {
+// relevancy of both items is the same, let's make use of stable sort
+return e1  e2 ? 1 : -1;
+  }
+
+  // we want the higher ranked events first
+  return (delta  0) ? 1 : -1;
+}
+
 GPtrArray* Indexer::SearchWithRelevancies (const gchar *search,
ZeitgeistTimeRange *time_range,
GPtrArray *templates,
@@ -860,24 +886,51 @@
 
 guint maxhits = count;
 
-if (result_type == RELEVANCY_RESULT_TYPE)
-{
-  enquire-set_sort_by_relevance ();
-}
-else
-{
-  enquire-set_sort_by_value (VALUE_TIMESTAMP, true);
-}
-
 if (storage_state != ZEITGEIST_STORAGE_STATE_ANY)
 {
   g_set_error_literal (error,
ZEITGEIST_ENGINE_ERROR,
ZEITGEIST_ENGINE_ERROR_INVALID_ARGUMENT,
-   Only ANY stogate state is supported);
+   Only ANY storage state is supported);
   return NULL;
 }
 
+if (result_type == RELEVANCY_RESULT_TYPE)
+{
+  enquire-set_sort_by_relevance ();
+}
+else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
+{
+  enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true);
+  enquire-set_collapse_key (VALUE_EVENT_ID);
+}
+else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
+{
+  enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true);
+  enquire-set_collapse_key (VALUE_URI_HASH);
+}
+else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_ORIGIN)
+{
+  // FIXME: not really correct but close :)
+  enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true);
+  enquire-set_collapse_key (VALUE_URI_HASH);
+  maxhits *= 3;
+}
+else
+{
+  // throw an error for these?
+  enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, true);
+  enquire-set_collapse_key (VALUE_EVENT_ID);
+  maxhits *= 3;
+}
+
 Xapian::Query q(query_parser-parse_query (query_string, QUERY_PARSER_FLAGS));
 enquire-set_query (q);
 Xapian::MSet hits (enquire-get_mset (offset, maxhits));
@@ -906,6 +959,8 @@
 NULL,