Merge authors: Michal Hruby (mhr3) Related merge proposals: https://code.launchpad.net/~mhr3/zeitgeist/fts-extras/+merge/92430 proposed by: Michal Hruby (mhr3) review: Approve - Siegfried Gevatter (rainct) ------------------------------------------------------------ revno: 391 [merge] committer: Michal Hruby <michal....@gmail.com> branch nick: zeitgeist timestamp: Fri 2012-02-10 13:30:21 +0100 message: Merged lp:~mhr3/zeitgeist/fts-extras modified: configure.ac extensions/fts++/Makefile.am extensions/fts++/fts.cpp extensions/fts++/fts.h extensions/fts++/fts.vapi extensions/fts++/indexer.cpp extensions/fts++/indexer.h extensions/fts++/stringutils.cpp extensions/fts++/stringutils.h extensions/fts++/test/Makefile.am extensions/fts++/test/test-indexer.cpp extensions/fts++/test/test-stringutils.cpp extensions/fts++/zeitgeist-fts.vala extensions/fts.vala src/remote.vala
-- lp:zeitgeist https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist. To unsubscribe from this branch go to https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription
=== modified file 'configure.ac' --- configure.ac 2012-02-08 18:54:58 +0000 +++ configure.ac 2012-02-09 15:32:36 +0000 @@ -40,6 +40,30 @@ AC_SUBST(ZEITGEIST_LIBS) ################################################# +# Dee-ICU check +################################################# +DEE_ICU_REQUIRED=1.0.2 + +AC_ARG_WITH([dee-icu], + AS_HELP_STRING([--with-dee-icu[=@<:@no/auto/yes@:>@]], + [Build the FTS extension with dee-icu]), + [with_dee_icu=$withval], + [with_dee_icu="auto"]) + +if test "x$with_dee_icu" = "xauto" ; then + PKG_CHECK_EXISTS([dee-icu-1.0 >= $DEE_ICU_REQUIRED], + with_dee_icu="yes", + with_dee_icu="no") +fi + +if test "x$with_dee_icu" = "xyes" ; then + PKG_CHECK_MODULES(DEE_ICU, dee-icu-1.0 >= $DEE_ICU_REQUIRED) + AC_DEFINE(HAVE_DEE_ICU, 1, [Have dee-icu]) +fi + +AM_CONDITIONAL(HAVE_DEE_ICU, test "x$with_dee_icu" = "xyes") + +################################################# # DBus service ################################################# @@ -88,3 +112,16 @@ fi AC_OUTPUT + +cat <<EOF + +${PACKAGE}-${VERSION} + + Build Environment + Install Prefix: ${prefix} + + Optional dependencies + dee-icu: ${with_dee_icu} + +EOF + === modified file 'extensions/fts++/Makefile.am' --- extensions/fts++/Makefile.am 2012-02-08 18:54:58 +0000 +++ extensions/fts++/Makefile.am 2012-02-09 15:32:36 +0000 @@ -76,6 +76,11 @@ -lxapian \ $(NULL) +if HAVE_DEE_ICU +AM_CPPFLAGS += $(DEE_ICU_CFLAGS) +zeitgeist_fts_LDADD += $(DEE_ICU_LIBS) +endif + BUILT_SOURCES = \ zeitgeist-internal.stamp \ zeitgeist-fts_vala.stamp \ === modified file 'extensions/fts++/fts.cpp' --- extensions/fts++/fts.cpp 2012-02-09 09:32:33 +0000 +++ extensions/fts++/fts.cpp 2012-02-09 18:34:36 +0000 @@ -84,6 +84,36 @@ return results; } +GPtrArray* +zeitgeist_indexer_search_with_relevancies (ZeitgeistIndexer *indexer, + const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + gdouble **relevancies, + gint *relevancies_size, + guint *matches, + GError **error) +{ + GPtrArray *results; + ZeitgeistFTS::Controller *_indexer; + + g_return_val_if_fail (indexer != NULL, NULL); + g_return_val_if_fail (search_string != NULL, NULL); + g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + results = _indexer->indexer->SearchWithRelevancies ( + search_string, time_range, templates, offset, count, result_type, + relevancies, relevancies_size, matches, error); + + return results; +} + void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, GPtrArray *events) { === modified file 'extensions/fts++/fts.h' --- extensions/fts++/fts.h 2012-02-09 09:32:33 +0000 +++ extensions/fts++/fts.h 2012-02-09 18:34:36 +0000 @@ -43,6 +43,19 @@ guint *matches, GError **error); +GPtrArray* zeitgeist_indexer_search_with_relevancies + (ZeitgeistIndexer *indexer, + const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + gdouble **relevancies, + gint *relevancies_size, + guint *matches, + GError **error); + void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, GPtrArray *events); === modified file 'extensions/fts++/fts.vapi' --- extensions/fts++/fts.vapi 2012-02-07 17:02:30 +0000 +++ extensions/fts++/fts.vapi 2012-02-09 18:34:36 +0000 @@ -14,6 +14,16 @@ ResultType result_type, out uint matches) throws GLib.Error; + public GLib.GenericArray<Event> search_with_relevancies ( + string search_string, + TimeRange time_range, + GLib.GenericArray<Event> templates, + uint offset, + uint count, + ResultType result_type, + out double[] relevancies, + out uint matches) throws GLib.Error; + public void index_events (GLib.GenericArray<Event> events); public void delete_events (uint[] event_ids); === modified file 'extensions/fts++/indexer.cpp' --- extensions/fts++/indexer.cpp 2012-02-09 09:37:48 +0000 +++ extensions/fts++/indexer.cpp 2012-02-10 11:54:32 +0000 @@ -356,10 +356,40 @@ } } +std::string Indexer::PreprocessString (std::string const& input) +{ + if (input.empty ()) return input; + + std::string result (StringUtils::RemoveUnderscores (input)); + // a simple heuristic for the uncamelcaser + size_t num_digits = StringUtils::CountDigits (result); + if (result.length () > 3 && num_digits < result.length () / 2) + { + // FIXME: process digits?, atm they stay attached to the text + result = StringUtils::UnCamelcase (result); + } + + std::string folded (StringUtils::AsciiFold (result)); + if (!folded.empty ()) + { + result += ' '; + result += folded; + } + +#ifdef DEBUG_PREPROCESSING + if (input != result) + g_debug ("processed: %s\n-> %s", input.c_str (), result.c_str ()); +#endif + + return result; +} + void Indexer::IndexText (std::string const& text) { - // FIXME: ascii folding! tokenizer->index_text (text, 5); + // this is by definition already a human readable display string, + // so it shouldn't need removal of underscores and uncamelcase + tokenizer->index_text (StringUtils::AsciiFold (text), 5); } void Indexer::IndexUri (std::string const& uri, std::string const& origin) @@ -403,9 +433,10 @@ gchar *pn = g_file_get_parse_name (f); gchar *basename = g_path_get_basename (pn); - // FIXME: remove unscores, CamelCase and process digits - tokenizer->index_text (basename, 5); - tokenizer->index_text (basename, 5, "N"); + // remove unscores, CamelCase and process digits + std::string processed (PreprocessString (basename)); + tokenizer->index_text (processed, 5); + tokenizer->index_text (processed, 5, "N"); g_free (basename); // limit the directory indexing to just a few levels @@ -420,17 +451,17 @@ g_free (dir); g_free (pn); - while (path_component.length () > 2 && + while (path_component.length () > 2 && weight_index < G_N_ELEMENTS (path_weights)) { // if this is already home directory we don't want it - if (path_component.length () == home_dir_path.length () && - path_component == home_dir_path) return; + if (path_component == home_dir_path) return; gchar *name = g_path_get_basename (path_component.c_str ()); - // FIXME: un-underscore, uncamelcase, ascii fold - tokenizer->index_text (name, path_weights[weight_index++]); + // un-underscore, uncamelcase, ascii fold + processed = PreprocessString (name); + tokenizer->index_text (processed, path_weights[weight_index++]); dir = g_path_get_dirname (path_component.c_str ()); path_component = dir; @@ -471,9 +502,10 @@ if (g_utf8_validate (unescaped_basename, -1, NULL)) { - // FIXME: remove unscores, CamelCase and process digits - tokenizer->index_text (unescaped_basename, 5); - tokenizer->index_text (unescaped_basename, 5, "N"); + // remove unscores, CamelCase and process digits + std::string processed (PreprocessString (unescaped_basename)); + tokenizer->index_text (processed, 5); + tokenizer->index_text (processed, 5, "N"); } // and also index hostname (taken from origin field if possible) @@ -505,6 +537,7 @@ { // we *really* don't want to index anything with this scheme } + // how about special casing (s)ftp and ssh? else { std::string authority, path, query; @@ -593,12 +626,11 @@ unsigned name_weight = is_subject ? 5 : 2; unsigned comment_weight = 2; - // FIXME: ascii folding somewhere - val = g_app_info_get_display_name (ai); if (val && val[0] != '\0') { - std::string display_name (val); + std::string display_name (PreprocessString (val)); + tokenizer->index_text (display_name, name_weight); tokenizer->index_text (display_name, name_weight, "A"); } @@ -606,9 +638,14 @@ val = g_desktop_app_info_get_generic_name (dai); if (val && val[0] != '\0') { + // this shouldn't need uncamelcasing std::string generic_name (val); + std::string generic_name_folded (StringUtils::AsciiFold (generic_name)); + tokenizer->index_text (generic_name, name_weight); tokenizer->index_text (generic_name, name_weight, "A"); + tokenizer->index_text (generic_name_folded, name_weight); + tokenizer->index_text (generic_name_folded, name_weight, "A"); } if (!is_subject) return true; @@ -642,7 +679,35 @@ return true; } -GPtrArray* Indexer::Search (const gchar *search_string, +std::string Indexer::CompileQueryString (const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates) +{ + std::string query_string (search_string); + + if (templates && templates->len > 0) + { + std::string filters (CompileEventFilterQuery (templates)); + query_string = "(" + query_string + ") AND (" + filters + ")"; + } + + if (time_range) + { + gint64 start_time = zeitgeist_time_range_get_start (time_range); + gint64 end_time = zeitgeist_time_range_get_end (time_range); + + if (start_time > 0 || end_time < G_MAXINT64) + { + std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time)); + query_string = "(" + query_string + ") AND (" + time_filter + ")"; + } + } + + g_debug ("query: %s", query_string.c_str ()); + return query_string; +} + +GPtrArray* Indexer::Search (const gchar *search, ZeitgeistTimeRange *time_range, GPtrArray *templates, guint offset, @@ -654,28 +719,22 @@ GPtrArray *results = NULL; try { - std::string query_string(search_string); - - if (templates && templates->len > 0) - { - std::string filters (CompileEventFilterQuery (templates)); - query_string = "(" + query_string + ") AND (" + filters + ")"; - } - - if (time_range) - { - gint64 start_time = zeitgeist_time_range_get_start (time_range); - gint64 end_time = zeitgeist_time_range_get_end (time_range); - - if (start_time > 0 || end_time < G_MAXINT64) - { - std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time)); - query_string = "(" + query_string + ") AND (" + time_filter + ")"; - } - } - - // FIXME: which result types coalesce? - guint maxhits = count * 3; + std::string query_string (CompileQueryString (search, time_range, templates)); + + // When sorting by some result types, we need to fetch some extra events + // from the Xapian index because the final result set will be coalesced + // on some property of the event + guint maxhits; + if (result_type == 100 || + result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS || + result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS) + { + maxhits = count; + } + else + { + maxhits = count * 3; + } if (result_type == 100) { @@ -686,7 +745,6 @@ enquire->set_sort_by_value (VALUE_TIMESTAMP, true); } - g_debug ("query: %s", query_string.c_str ()); Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); enquire->set_query (q); Xapian::MSet hits (enquire->get_mset (offset, maxhits)); @@ -753,7 +811,119 @@ } catch (Xapian::Error const& e) { - g_warning ("Failed to index event: %s", e.get_msg ().c_str ()); + g_warning ("Failed to search index: %s", e.get_msg ().c_str ()); + g_set_error_literal (error, + ZEITGEIST_ENGINE_ERROR, + ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, + e.get_msg ().c_str ()); + } + + return results; +} + +GPtrArray* Indexer::SearchWithRelevancies (const gchar *search, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + gdouble **relevancies, + gint *relevancies_size, + guint *matches, + GError **error) +{ + GPtrArray *results = NULL; + try + { + std::string query_string (CompileQueryString (search, time_range, templates)); + + guint maxhits = count; + + if (result_type == 100) + { + enquire->set_sort_by_relevance (); + } + else + { + enquire->set_sort_by_value (VALUE_TIMESTAMP, true); + } + + Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); + enquire->set_query (q); + Xapian::MSet hits (enquire->get_mset (offset, maxhits)); + Xapian::doccount hitcount = hits.get_matches_estimated (); + + if (result_type == 100) + { + std::vector<unsigned> event_ids; + std::vector<gdouble> relevancy_arr; + Xapian::MSetIterator iter, end; + for (iter = hits.begin (), end = hits.end (); iter != end; ++iter) + { + Xapian::Document doc(iter.get_document ()); + double unserialized = + Xapian::sortable_unserialise (doc.get_value (VALUE_EVENT_ID)); + unsigned event_id = static_cast<unsigned>(unserialized); + event_ids.push_back (event_id); + + double rank = iter.get_percent () / 100.; + relevancy_arr.push_back (rank); + } + + results = zeitgeist_db_reader_get_events (zg_reader, + &event_ids[0], + event_ids.size (), + NULL, + error); + + if (results->len != relevancy_arr.size ()) + { + g_warning ("Results don't match relevancies!"); + g_set_error_literal (error, + ZEITGEIST_ENGINE_ERROR, + ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, + "Internal database error"); + return NULL; + } + + if (relevancies) + { + *relevancies = (gdouble*) g_memdup (&relevancy_arr[0], + sizeof (gdouble) * results->len); + } + if (relevancies_size) + { + *relevancies_size = relevancy_arr.size (); + } + } + else + { + g_set_error_literal (error, + ZEITGEIST_ENGINE_ERROR, + ZEITGEIST_ENGINE_ERROR_INVALID_ARGUMENT, + "Only RELEVANCY result type is supported"); + /* + * perhaps something like this could be used here? + std::map<unsigned, gdouble> relevancy_map; + foreach (...) + { + double rank = iter.get_percent () / 100.; + if (rank > relevancy_map[event_id]) + { + relevancy_map[event_id] = rank; + } + } + */ + } + + if (matches) + { + *matches = hitcount; + } + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to search index: %s", e.get_msg ().c_str ()); g_set_error_literal (error, ZEITGEIST_ENGINE_ERROR, ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, === modified file 'extensions/fts++/indexer.h' --- extensions/fts++/indexer.h 2012-02-09 09:37:48 +0000 +++ extensions/fts++/indexer.h 2012-02-10 11:30:52 +0000 @@ -77,7 +77,7 @@ void DeleteEvent (guint32 event_id); void SetDbMetadata (std::string const& key, std::string const& value); - GPtrArray* Search (const gchar *search_string, + GPtrArray* Search (const gchar *search, ZeitgeistTimeRange *time_range, GPtrArray *templates, guint offset, @@ -85,11 +85,26 @@ ZeitgeistResultType result_type, guint *matches, GError **error); + GPtrArray* SearchWithRelevancies (const gchar *search, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + gdouble **relevancies, + gint *relevancies_size, + guint *matches, + GError **error); private: std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri); std::string CompileEventFilterQuery (GPtrArray *templates); std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end); + std::string CompileQueryString (const gchar *search, + ZeitgeistTimeRange *time_range, + GPtrArray *templates); + + std::string PreprocessString (std::string const& input); void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc); void IndexText (std::string const& text); === modified file 'extensions/fts++/stringutils.cpp' --- extensions/fts++/stringutils.cpp 2012-02-09 09:32:33 +0000 +++ extensions/fts++/stringutils.cpp 2012-02-10 11:54:32 +0000 @@ -17,9 +17,14 @@ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamst...@gmail.com> * */ + +#include "stringutils.h" #include <string> +#include <algorithm> -#include "stringutils.h" +#ifdef HAVE_DEE_ICU +#include <dee-icu.h> +#endif using namespace std; @@ -123,6 +128,87 @@ } } +string RemoveUnderscores (string const &input) +{ + string result (input); + std::replace (result.begin (), result.end (), '_', ' '); + + return result; +} + +static bool is_digit (char c) { return c >= '0' && c <= '9'; } + +size_t CountDigits (string const &input) +{ + return std::count_if (input.begin (), input.end (), is_digit); +} + +static GRegex *camelcase_matcher = NULL; + +static gboolean +matcher_cb (const GMatchInfo *match_info, GString *result, gpointer user_data) +{ + gint start_pos; + g_match_info_fetch_pos (match_info, 0, &start_pos, NULL); + if (start_pos != 0) g_string_append_c (result, ' '); + gchar *word = g_match_info_fetch (match_info, 0); + g_string_append (result, word); + g_free (word); + + return FALSE; +} + +string UnCamelcase (string const &input) +{ + if (camelcase_matcher == NULL) + { + camelcase_matcher = g_regex_new ("(?<=^|[[:lower:]])[[:upper:]]+[^[:upper:]]+", G_REGEX_OPTIMIZE, (GRegexMatchFlags) 0, NULL); + if (camelcase_matcher == NULL) g_critical ("Unable to create matcher!"); + } + + gchar *result = g_regex_replace_eval (camelcase_matcher, input.c_str (), + input.length (), 0, + (GRegexMatchFlags) 0, + matcher_cb, NULL, NULL); + + string ret (result); + g_free (result); + return ret; +} + +#ifdef HAVE_DEE_ICU +static DeeICUTermFilter *icu_filter = NULL; + +/** + * Use ascii folding filter on the input text and return folded version + * of the original string. + * + * Note that if the folded version is exactly the same as the original + * empty string will be returned. + */ +string AsciiFold (string const& input) +{ + if (icu_filter == NULL) + { + icu_filter = dee_icu_term_filter_new_ascii_folder (); + if (icu_filter == NULL) return ""; + } + + // FIXME: check first if the input contains any non-ascii chars? + + gchar *folded = dee_icu_term_filter_apply (icu_filter, input.c_str ()); + string result (folded); + g_free (folded); + + return result == input ? "" : result; +} +#else +string AsciiFold (string const& input) +{ + return ""; +} +#endif + } /* namespace StringUtils */ } /* namespace ZeitgeistFTS */ === modified file 'extensions/fts++/stringutils.h' --- extensions/fts++/stringutils.h 2012-02-09 09:32:33 +0000 +++ extensions/fts++/stringutils.h 2012-02-10 10:19:52 +0000 @@ -37,6 +37,14 @@ std::string &path, std::string &basename); +std::string RemoveUnderscores (std::string const &input); + +size_t CountDigits (std::string const &input); + +std::string UnCamelcase (std::string const &input); + +std::string AsciiFold (std::string const& input); + } /* namespace StringUtils */ } /* namespace ZeitgeistFTS */ === modified file 'extensions/fts++/test/Makefile.am' --- extensions/fts++/test/Makefile.am 2012-02-08 18:54:58 +0000 +++ extensions/fts++/test/Makefile.am 2012-02-09 15:32:36 +0000 @@ -25,3 +25,8 @@ -lxapian \ $(NULL) +if HAVE_DEE_ICU +AM_CPPFLAGS += $(DEE_ICU_CFLAGS) +test_fts_LDADD += $(DEE_ICU_LIBS) +endif + === modified file 'extensions/fts++/test/test-indexer.cpp' --- extensions/fts++/test/test-indexer.cpp 2012-02-09 09:32:33 +0000 +++ extensions/fts++/test/test-indexer.cpp 2012-02-10 12:07:27 +0000 @@ -145,6 +145,26 @@ return event; } +static ZeitgeistEvent* create_test_event5 (void) +{ + ZeitgeistEvent *event = zeitgeist_event_new (); + ZeitgeistSubject *subject = zeitgeist_subject_new (); + + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_SOURCE_CODE); + zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT); + zeitgeist_subject_set_uri (subject, "file:///home/username/projects/GLibSignalImplementation.cpp"); + zeitgeist_subject_set_text (subject, "Because c++ is awesome"); + zeitgeist_subject_set_mimetype (subject, "text/x-c++src"); + + zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_CREATE_EVENT); + zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); + zeitgeist_event_set_actor (event, "application://gedit.desktop"); + zeitgeist_event_add_subject (event, subject); + + g_object_unref (subject); + return event; +} + // Steals the event, ref it if you want to keep it static guint index_event (Fixture *fix, ZeitgeistEvent *event) @@ -426,6 +446,71 @@ } static void +test_simple_underscores (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + index_event (fix, create_test_event3 ()); + event_id = index_event (fix, create_test_event4 ()); + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "fabulo*", + zeitgeist_time_range_new_anytime (), + g_ptr_array_new (), + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); +} + +static void +test_simple_camelcase (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + index_event (fix, create_test_event3 ()); + index_event (fix, create_test_event4 ()); + event_id = index_event (fix, create_test_event5 ()); + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "signal", + zeitgeist_time_range_new_anytime (), + g_ptr_array_new (), + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); +} + +static void test_simple_cjk (Fixture *fix, gconstpointer data) { guint matches; @@ -517,6 +602,10 @@ setup, test_simple_noexpand, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0, setup, test_simple_noexpand_valid, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleUnderscores", Fixture, 0, + setup, test_simple_underscores, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleCamelcase", Fixture, 0, + setup, test_simple_camelcase, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0, setup, test_simple_url_unescape, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0, === modified file 'extensions/fts++/test/test-stringutils.cpp' --- extensions/fts++/test/test-stringutils.cpp 2012-02-09 09:32:33 +0000 +++ extensions/fts++/test/test-stringutils.cpp 2012-02-10 11:54:32 +0000 @@ -163,6 +163,91 @@ g_assert_cmpstr ("type=A", ==, query.c_str ()); } +static void +test_ascii_fold (Fixture *fix, gconstpointer data) +{ + std::string folded; + + folded = StringUtils::AsciiFold (""); + g_assert_cmpstr ("", ==, folded.c_str ()); + + // if the original matches the folded version, AsciiFold returns "" + folded = StringUtils::AsciiFold ("a"); + g_assert_cmpstr ("", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("abcdef"); + g_assert_cmpstr ("", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("å"); + g_assert_cmpstr ("a", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("åå"); + g_assert_cmpstr ("aa", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("aåaåa"); + g_assert_cmpstr ("aaaaa", ==, folded.c_str ()); +} + +static void +test_underscores (Fixture *fix, gconstpointer data) +{ + g_assert_cmpstr ("", ==, StringUtils::RemoveUnderscores ("").c_str ()); + + g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("_").c_str ()); + + g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("___").c_str ()); + + g_assert_cmpstr ("abcd", ==, StringUtils::RemoveUnderscores ("abcd").c_str ()); + + g_assert_cmpstr (" abcd ", ==, StringUtils::RemoveUnderscores ("_abcd_").c_str ()); + + g_assert_cmpstr ("a b c d", ==, StringUtils::RemoveUnderscores ("a_b_c_d").c_str ()); +} + +static void +test_uncamelcase (Fixture *fix, gconstpointer data) +{ + g_assert_cmpstr ("", ==, StringUtils::UnCamelcase ("").c_str ()); + + g_assert_cmpstr ("abcd", ==, StringUtils::UnCamelcase ("abcd").c_str ()); + + g_assert_cmpstr ("Abcd", ==, StringUtils::UnCamelcase ("Abcd").c_str ()); + + g_assert_cmpstr ("ABCD", ==, StringUtils::UnCamelcase ("ABCD").c_str ()); + + g_assert_cmpstr ("ABcd", ==, StringUtils::UnCamelcase ("ABcd").c_str ()); + + g_assert_cmpstr ("Abcd Ef", ==, StringUtils::UnCamelcase ("AbcdEf").c_str ()); + + g_assert_cmpstr ("Text Editor", ==, StringUtils::UnCamelcase ("Text Editor").c_str ()); + + g_assert_cmpstr ("py Karaoke", ==, StringUtils::UnCamelcase ("pyKaraoke").c_str ()); + + g_assert_cmpstr ("Zeitgeist Project", ==, StringUtils::UnCamelcase ("ZeitgeistProject").c_str ()); + + g_assert_cmpstr ("Very Nice Camel Case Text", ==, StringUtils::UnCamelcase ("VeryNiceCamelCaseText").c_str ()); + + g_assert_cmpstr ("Ňeedš Ťo Wórk Óń Útf Čhářacters As WelL", ==, + StringUtils::UnCamelcase ("ŇeedšŤoWórkÓńÚtfČhářactersAsWelL").c_str ()); +} + +static void +test_count_digits (Fixture *fix, gconstpointer data) +{ + g_assert_cmpuint (0, ==, StringUtils::CountDigits ("")); + + g_assert_cmpuint (0, ==, StringUtils::CountDigits ("abcdefghijklmnopqrstuvwxyz")); + + g_assert_cmpuint (10, ==, StringUtils::CountDigits ("0123456789")); + + g_assert_cmpuint (1, ==, StringUtils::CountDigits ("abc3")); + + g_assert_cmpuint (3, ==, StringUtils::CountDigits ("::123__poa//weee")); + + g_assert_cmpuint (5, ==, StringUtils::CountDigits ("PCN30129.JPG")); + +} + G_BEGIN_DECLS void test_stringutils_create_suite (void) @@ -173,6 +258,16 @@ setup, test_mangle, teardown); g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0, setup, test_split, teardown); + g_test_add ("/Zeitgeist/FTS/StringUtils/RemoveUnderscores", Fixture, 0, + setup, test_underscores, teardown); + g_test_add ("/Zeitgeist/FTS/StringUtils/UnCamelcase", Fixture, 0, + setup, test_uncamelcase, teardown); + g_test_add ("/Zeitgeist/FTS/StringUtils/CountDigits", Fixture, 0, + setup, test_count_digits, teardown); +#ifdef HAVE_DEE_ICU + g_test_add ("/Zeitgeist/FTS/StringUtils/AsciiFold", Fixture, 0, + setup, test_ascii_fold, teardown); +#endif } G_END_DECLS === modified file 'extensions/fts++/zeitgeist-fts.vala' --- extensions/fts++/zeitgeist-fts.vala 2012-02-09 09:32:33 +0000 +++ extensions/fts++/zeitgeist-fts.vala 2012-02-09 18:34:36 +0000 @@ -132,6 +132,23 @@ events = Events.to_variant (results); } + public async void search_with_relevancies ( + string query_string, Variant time_range, + Variant filter_templates, + uint offset, uint count, uint result_type, + out Variant events, out double[] relevancies, + out uint matches) + throws Error + { + var tr = new TimeRange.from_variant (time_range); + var templates = Events.from_variant (filter_templates); + var results = instance.indexer.search_with_relevancies ( + query_string, tr, templates, offset, count, + (ResultType) result_type, out relevancies, out matches); + + events = Events.to_variant (results); + } + private static void name_acquired_callback (DBusConnection conn) { name_acquired = true; === modified file 'extensions/fts.vala' --- extensions/fts.vala 2012-02-07 12:47:44 +0000 +++ extensions/fts.vala 2012-02-10 09:35:31 +0000 @@ -31,6 +31,14 @@ uint offset, uint count, uint result_type, [DBus (signature = "a(asaasay)")] out Variant events, out uint matches) throws Error; + public abstract async void search_with_relevancies ( + string query_string, + [DBus (signature = "(xx)")] Variant time_range, + [DBus (signature = "a(asaasay)")] Variant filter_templates, + uint offset, uint count, uint result_type, + [DBus (signature = "a(asaasay)")] out Variant events, + out double[] relevancies, + out uint matches) throws Error; } /* Because of a Vala bug we have to define the proxy interface outside of @@ -55,6 +63,7 @@ private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer"; private RemoteSimpleIndexer siin; + private bool siin_connection_failed = false; private uint registration_id; private MonitorManager? notifier; @@ -67,6 +76,8 @@ { if (Utils.using_in_memory_database ()) return; + // FIXME: check dbus and see if fts is installed? + // installing a monitor from the daemon will ensure that we don't // miss any notifications that would be emitted in between // zeitgeist start and fts daemon start @@ -109,23 +120,40 @@ try { siin = conn.get_proxy.end<RemoteSimpleIndexer> (res); + siin_connection_failed = false; } catch (IOError err) { + siin_connection_failed = true; warning ("%s", err.message); } } - public async void search (string query_string, Variant time_range, - Variant filter_templates, uint offset, uint count, uint result_type, - out Variant events, out uint matches) throws Error + public async void wait_for_proxy () throws Error { + int i = 0; + while (this.siin == null && i < 6 && !siin_connection_failed) + { + Timeout.add_full (Priority.DEFAULT_IDLE, 250, + wait_for_proxy.callback); + i++; + yield; + } + if (siin == null || !(siin is DBusProxy)) { // FIXME: queue until we have the proxy throw new EngineError.DATABASE_ERROR ( "Not connected to SimpleIndexer"); } + } + + public async void search (string query_string, Variant time_range, + Variant filter_templates, uint offset, uint count, uint result_type, + out Variant events, out uint matches) throws Error + { + if (siin == null) yield wait_for_proxy (); + var timer = new Timer (); yield siin.search (query_string, time_range, filter_templates, offset, count, result_type, @@ -134,6 +162,24 @@ (uint) events.n_children (), matches, timer.elapsed ()); } + public async void search_with_relevancies ( + string query_string, Variant time_range, + Variant filter_templates, uint offset, uint count, uint result_type, + out Variant events, out double[] relevancies, out uint matches) + throws Error + { + if (siin == null) yield wait_for_proxy (); + + var timer = new Timer (); + yield siin.search_with_relevancies ( + query_string, time_range, filter_templates, + offset, count, result_type, + out events, out relevancies, out matches); + + debug ("Got %u[/%u] results from indexer (in %f seconds)", + (uint) events.n_children (), matches, timer.elapsed ()); + } + } [ModuleInit] === modified file 'src/remote.vala' --- src/remote.vala 2012-02-05 14:52:13 +0000 +++ src/remote.vala 2012-02-09 18:34:36 +0000 @@ -121,6 +121,13 @@ uint offset, uint count, uint result_type, [DBus (signature = "a(asaasay)")] out Variant events, out uint matches) throws Error; + public abstract async void search_with_relevancies ( + string query_string, + [DBus (signature = "(xx)")] Variant time_range, + [DBus (signature = "a(asaasay)")] Variant filter_templates, + uint offset, uint count, uint result_type, + [DBus (signature = "a(asaasay)")] out Variant events, + out double[] relevancies, out uint matches) throws Error; } /* FIXME: Remove this! Only here because of a bug in Vala (see ext-fts) */
_______________________________________________ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp