Jamie,
As your request, the patch for feature 3) is ready, please review it.
Thanks,
Halton.
On Fri, 2008-02-01 at 09:52 -0500, Jamie McCracken wrote:
> > > 3) Constantly changing files - we should ignore these especially
> > > bittorrent ones. Perhaps keep a small size-limited stack of
> recently
> > > indexed files and if one of those files has been changed more than
> 10
> > > times in a few minutes we should ignore them until trackerd next
> > > restarts
> >
> > It is like prison rule.
> >
> > Suppose these stack is like:
> >
> > file_name first_change_time change_accounts
> > /a 11111 3
> > /b 22222 8
> > /c 33333 1
> >
> > When a file is changed,
> > if ((current_time - first_change_time) > MAX_DURATION) {
> > first_change_time = current_time
> > } else {
> > change_accounts ++;
> > if (change_accounts < MAX_CHANGE_TIMES) {
> > //reflect this change
> > } else {
> > // ignore this change
> > }
> > }
>
> thats pretty much it but make sure it applies to files only (and not
> emails or conversations)
>
> when we ignore the file - add it to an ignore list in memory. When
> trackerd exits, save that list to file. When trackerd is next
> restarted
> load the list and index the entries and then reset it.
>
> >
> > Stop here if my thinking is totally wrong.
> >
> > If my proto is not wrong, the question is where to have this stack?
> > (1) In memory
> > There will have a long list for all changed files, even it is
> changed
> > only once.
> >
> > (2) In database
> > Add a property for each file in Service table. Could be slower.
>
>
> it would not be slower as such cause we are updating the mtime in that
> table anyhow whenever it changes. However we dont want to do a db
> change
> at this point so (1) would be better for now
>
> >
> > Any idea?
>
> Use a fixed size LIFO stack of 50 items max in memory
>
> you can use a static array if you like or if you prefer the glib
> double
> queue
>
> http://library.gnome.org/devel/glib/unstable/glib-Double-ended-Queues.html)
>
>
> but make sure you pop tail to keep its size limited
Index: trunk/src/trackerd/tracker-utils.h
===================================================================
--- trunk/src/trackerd/tracker-utils.h (revision 1117)
+++ trunk/src/trackerd/tracker-utils.h (working copy)
@@ -197,8 +197,16 @@
EVENT_CACHE_FLUSHED
} LoopEvent;
+
typedef struct {
+ gchar *uri;
+ time_t first_change_time;
+ gint num_of_change;
+} FileChange;
+
+typedef struct {
+
gboolean readonly;
TrackerStatus status;
@@ -339,6 +347,9 @@
/* nfs options */
gboolean use_nfs_safe_locking; /* use safer but much slower external lock file when users home dir is on an nfs systems */
+ /* Queue for recorad file changes */
+ GQueue *file_change_queue;
+
/* application run time values */
gboolean is_indexing;
gboolean in_flush;
@@ -647,6 +658,7 @@
char * tracker_get_status (void);
+void free_file_change (FileChange **user_data);
gboolean tracker_do_cleanup (const gchar *sig_msg);
gboolean tracker_pause_on_battery (void);
Index: trunk/src/trackerd/trackerd.c
===================================================================
--- trunk/src/trackerd/trackerd.c (revision 1117)
+++ trunk/src/trackerd/trackerd.c (working copy)
@@ -333,7 +333,22 @@
}
+void
+free_file_change (FileChange **user_data)
+{
+ FileChange *change = *user_data;
+ g_free (change->uri);
+ change->uri = NULL;
+ change = NULL;
+}
+static void
+free_file_change_queue (gpointer data, gpointer user_data)
+{
+ FileChange *change = (FileChange *)data;
+ free_file_change (&change);
+}
+
gboolean
tracker_do_cleanup (const gchar *sig_msg)
{
@@ -436,6 +451,14 @@
tracker_remove_dirs (tracker->sys_tmp_root_dir);
}
+ /* remove file change queue */
+ if (tracker->file_change_queue) {
+ g_queue_foreach (tracker->file_change_queue,
+ free_file_change_queue, NULL);
+ g_queue_free (tracker->file_change_queue);
+ tracker->file_change_queue = NULL;
+ }
+
g_main_loop_quit (tracker->loop);
exit (EXIT_SUCCESS);
Index: trunk/src/trackerd/tracker-db.c
===================================================================
--- trunk/src/trackerd/tracker-db.c (revision 1117)
+++ trunk/src/trackerd/tracker-db.c (working copy)
@@ -33,6 +33,9 @@
extern Tracker *tracker;
#define XMP_MIME_TYPE "application/rdf+xml"
+#define STACK_SIZE 3
+#define MAX_DURATION 180
+#define MAX_CHANGE_TIMES 10
typedef struct {
DBConnection *db_con;
@@ -548,7 +551,113 @@
tracker_notify_meta_data_available ();
}
+static void
+refresh_file_change_queue (gpointer data, gpointer user_data)
+{
+ FileChange *change = (FileChange*)data;
+ int *current = (int *)user_data;
+ if ((*current - change->first_change_time) > MAX_DURATION) {
+ g_queue_remove_all (tracker->file_change_queue, data);
+ free_file_change (&change);
+ }
+}
+
+static gint
+uri_comp (gconstpointer a, gconstpointer b)
+{
+ FileChange *change = (FileChange *)a;
+ char *valuea = change->uri;
+ char *valueb = (char *)b;
+
+ return strcmp (valuea, valueb);
+}
+
+static gint
+file_change_sort_comp (gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ FileChange *changea, *changeb;
+ changea = (FileChange *)a;
+ changeb = (FileChange *)b;
+
+ if ((changea->num_of_change - changeb->num_of_change) == 0) {
+ return changea->first_change_time - changeb->first_change_time;
+ } else {
+ return changea->num_of_change - changeb->num_of_change;
+ }
+}
+
+static void
+print_file_change_queue ()
+{
+ GList *head, *l;
+ FileChange *change;
+ gint count;
+
+ head = g_queue_peek_head_link (tracker->file_change_queue);
+
+ tracker_log ("File Change queue is:");
+ count = 1;
+ for (l = g_list_first (head); l != NULL; l = g_list_next (l)) {
+ change = (FileChange*)l->data;
+ tracker_log ("%d\t%s\t%d\t%d",
+ count++, change->uri,
+ change->first_change_time,
+ change->num_of_change);
+ }
+
+}
+
+static gboolean
+check_uri_changed_frequently (const char *uri)
+{
+ GList *find;
+ FileChange *change;
+ time_t current;
+
+ if (!tracker->file_change_queue) {
+ /* init queue */
+ tracker->file_change_queue = g_queue_new ();
+ }
+
+ current = time (NULL);
+
+ /* remove items which are very old */
+ g_queue_foreach (tracker->file_change_queue,
+ refresh_file_change_queue, ¤t);
+
+ find = g_queue_find_custom (tracker->file_change_queue, uri, uri_comp);
+ if (!find) {
+ /* not found, add to in the queue */
+ change = g_new0 (FileChange, 1);
+ change->uri = g_strdup (uri);
+ change->first_change_time = current;
+ change->num_of_change = 1;
+ if (g_queue_get_length (tracker->file_change_queue) == STACK_SIZE) {
+ FileChange *tmp = (FileChange*) g_queue_pop_head (
+ tracker->file_change_queue);
+ free_file_change (&tmp);
+ }
+ g_queue_insert_sorted (tracker->file_change_queue, change,
+ file_change_sort_comp, NULL);
+ print_file_change_queue ();
+ return FALSE;
+ } else {
+ change = (FileChange *) find->data;
+ (change->num_of_change)++;
+ g_queue_sort (tracker->file_change_queue,
+ file_change_sort_comp, NULL);
+ if (change->num_of_change < MAX_CHANGE_TIMES) {
+ print_file_change_queue ();
+ return FALSE;
+ } else {
+ print_file_change_queue ();
+ return TRUE;
+ }
+ }
+
+}
+
void
tracker_db_insert_pending_file (DBConnection *db_con, guint32 file_id, const char *uri, const char *moved_to_uri, const char *mime, int counter, TrackerChangeAction action, gboolean is_directory, gboolean is_new, int service_type_id)
{
@@ -556,6 +665,12 @@
g_return_if_fail (tracker_check_uri (uri));
+ /* check if uri changed too frequently */
+ if (((action == TRACKER_ACTION_CHECK) ||
+ (action == TRACKER_ACTION_FILE_CHECK)) &&
+ check_uri_changed_frequently (uri))
+ return;
+
/* check if uri already has a pending action and update accordingly */
info = tracker_db_get_pending_file (db_con, uri);
@@ -682,8 +797,8 @@
}
if (info->mime == NULL) {
- info->mime = g_strdup("unknown");
- }
+ info->mime = g_strdup("unknown");
+ }
if (info->is_new) {
if (info->mime)
_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list