Siegfried Gevatter has proposed merging
lp:~zeitgeist/zeitgeist/fts-origin-hashing into lp:zeitgeist.
Requested reviews:
Zeitgeist Framework Team (zeitgeist)
For more details, see:
https://code.launchpad.net/~zeitgeist/zeitgeist/fts-origin-hashing/+merge/98281
--
https://code.launchpad.net/~zeitgeist/zeitgeist/fts-origin-hashing/+merge/98281
Your team Zeitgeist Framework Team is requested to review the proposed merge of
lp:~zeitgeist/zeitgeist/fts-origin-hashing into lp:zeitgeist.
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp 2012-03-16 20:03:05 +
+++ extensions/fts++/indexer.cpp 2012-03-19 20:32:18 +
@@ -43,6 +43,7 @@
const Xapian::valueno VALUE_EVENT_ID = 0;
const Xapian::valueno VALUE_TIMESTAMP = 1;
const Xapian::valueno VALUE_URI_HASH = 2;
+const Xapian::valueno VALUE_ORIGIN_HASH = 3;
#define QUERY_PARSER_FLAGS \
Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
@@ -778,7 +779,9 @@
}
else
{
- enquire-set_sort_by_value (VALUE_TIMESTAMP, true);
+ bool reversed_sort = not
+ zeitgeist_result_type_is_sort_order_asc (result_type);
+ enquire-set_sort_by_value (VALUE_TIMESTAMP, reversed_sort);
}
if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
@@ -786,7 +789,19 @@
result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
{
-enquire-set_collapse_key (VALUE_URI_HASH);
+ enquire-set_collapse_key (VALUE_URI_HASH);
+}
+else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_ORIGIN ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_ORIGIN)
+{
+ enquire-set_collapse_key (VALUE_ORIGIN_HASH);
+}
+else if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
+result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
+{
+ enquire-set_collapse_key (VALUE_EVENT_ID);
}
Xapian::Query q(query_parser-parse_query (query_string, QUERY_PARSER_FLAGS));
@@ -1096,12 +,8 @@
return NULL;
}
-bool reversed_sort =
- result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
- result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
- result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
- result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_ORIGIN ||
- result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_ORIGIN;
+bool reversed_sort = not
+zeitgeist_result_type_is_sort_order_asc (result_type);
if (result_type == RELEVANCY_RESULT_TYPE)
{
@@ -1126,10 +1137,8 @@
result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_ORIGIN ||
result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_ORIGIN)
{
- // FIXME: not really correct but close :)
enquire-set_sort_by_relevance_then_value (VALUE_TIMESTAMP, reversed_sort);
- enquire-set_collapse_key (VALUE_URI_HASH);
- maxhits *= 3;
+ enquire-set_collapse_key (VALUE_ORIGIN_HASH);
}
else
{
@@ -1311,19 +1320,36 @@
return; // ignore this event completely...
}
+ guint8 uri_hash[HASH_LENGTH + 1];
+
// We need the subject URI so we can use Xapian's collapse key feature
// for *_SUBJECT grouping. However, to save space, we'll just save a hash.
// A better option would be using URI's id, but for that we'd need a SQL
// query that'd be subject to races.
// FIXME(?): This doesn't work for events with multiple subjects.
g_checksum_update (checksum, (guchar *) uri.c_str (), -1);
- guint8 uri_hash[HASH_LENGTH + 1];
gsize hash_size = HASH_LENGTH;
g_checksum_get_digest (checksum, uri_hash, hash_size);
g_checksum_reset (checksum);
g_assert (hash_size == HASH_LENGTH);
doc.add_value (VALUE_URI_HASH, std::string((char *) uri_hash, hash_size));
+ size_t colon_pos = uri.find (':');
+ // FIXME: use current_origin once we have that
+ val = zeitgeist_subject_get_origin (subject);
+ // Make sure the schemas of the URI and the origin is the same,
+ // to avoid saving some junk.
+ if (val colon_pos != std::string::npos
+ strncmp (uri.c_str (), val, colon_pos+1) == 0)
+ {
+ g_checksum_update (checksum, (guchar *) val, -1);
+ g_checksum_get_digest (checksum, uri_hash, hash_size);
+ g_checksum_reset (checksum);
+ g_assert (hash_size == HASH_LENGTH);
+ doc.add_value (VALUE_ORIGIN_HASH,
+ std::string((char *) uri_hash, hash_size));
+ }
+
val = zeitgeist_subject_get_text (subject);
if (val val[0] != '\0')
{
=== modified file 'src/datamodel.vala'
--- src/datamodel.vala 2012-02-18 21:33:57 +
+++ src