Jamie, 

As your request, the patch for feature 3) is ready, please review it.

Thanks,
Halton.
On Fri, 2008-02-01 at 09:52 -0500, Jamie McCracken wrote:
> > > 3) Constantly changing files - we should ignore these especially
> > > bittorrent ones. Perhaps keep a small size-limited stack of
> recently
> > > indexed files and if one of those files has been changed more than
> 10
> > > times in a few minutes we should ignore them until trackerd next
> > > restarts
> > 
> > It is like prison rule. 
> > 
> > Suppose these stack is like:
> > 
> > file_name      first_change_time     change_accounts
> > /a             11111                 3
> > /b             22222                 8
> > /c             33333                 1
> > 
> > When a file is changed, 
> >   if ((current_time - first_change_time) > MAX_DURATION) {
> >     first_change_time = current_time
> >   } else {
> >     change_accounts ++;
> >     if (change_accounts < MAX_CHANGE_TIMES) {
> >       //reflect this change
> >     } else {
> >       // ignore this change
> >     }
> >   }
> 
> thats pretty much it but make sure it applies to files only (and not
> emails or conversations)
> 
> when we ignore the file - add it to an ignore list in memory. When
> trackerd exits, save that list to file. When trackerd is next
> restarted
> load the list and index the entries and then reset it.
> 
> > 
> > Stop here if my thinking is totally wrong.
> > 
> > If my proto is not wrong, the question is where to have this stack?
> > (1) In memory
> >    There will have a long list for all changed files, even it is
> changed
> > only once.
> >    
> > (2) In database
> >    Add a property for each file in Service table. Could be slower.
> 
> 
> it would not be slower as such cause we are updating the mtime in that
> table anyhow whenever it changes. However we dont want to do a db
> change
> at this point so (1) would be better for now
> 
> > 
> > Any idea?
> 
> Use a fixed size LIFO stack of 50 items max in memory 
> 
> you can use a static array if you like or if you prefer the glib
> double
> queue 
> 
> http://library.gnome.org/devel/glib/unstable/glib-Double-ended-Queues.html)
> 
> 
> but make sure you pop tail to keep its size limited
Index: trunk/src/trackerd/tracker-utils.h
===================================================================
--- trunk/src/trackerd/tracker-utils.h	(revision 1117)
+++ trunk/src/trackerd/tracker-utils.h	(working copy)
@@ -197,8 +197,16 @@
 	EVENT_CACHE_FLUSHED
 } LoopEvent;
 
+
 typedef struct {
+	gchar	*uri;
+	time_t	first_change_time;
+	gint    num_of_change;
+} FileChange;
 
+
+typedef struct {
+
 	gboolean	readonly;
 
 	TrackerStatus	status;
@@ -339,6 +347,9 @@
 	/* nfs options */
 	gboolean	use_nfs_safe_locking; /* use safer but much slower external lock file when users home dir is on an nfs systems */
 
+	/* Queue for recorad file changes */
+	GQueue		*file_change_queue;
+
 	/* application run time values */
 	gboolean	is_indexing;
 	gboolean	in_flush;
@@ -647,6 +658,7 @@
 
 char *		tracker_get_status 		(void);
 
+void		free_file_change		(FileChange **user_data);
 gboolean	tracker_do_cleanup 		(const gchar *sig_msg);
 
 gboolean	tracker_pause_on_battery 	(void);
Index: trunk/src/trackerd/trackerd.c
===================================================================
--- trunk/src/trackerd/trackerd.c	(revision 1117)
+++ trunk/src/trackerd/trackerd.c	(working copy)
@@ -333,7 +333,22 @@
 }
 
 
+void
+free_file_change (FileChange **user_data)
+{
+	FileChange *change = *user_data;
+	g_free (change->uri);
+	change->uri = NULL;
+	change = NULL;
+}
 
+static void
+free_file_change_queue (gpointer data, gpointer user_data)
+{
+	FileChange *change = (FileChange *)data;
+	free_file_change (&change);
+}
+
 gboolean
 tracker_do_cleanup (const gchar *sig_msg)
 {
@@ -436,6 +451,14 @@
 		tracker_remove_dirs (tracker->sys_tmp_root_dir);
 	}
 
+	/* remove file change queue */
+	if (tracker->file_change_queue) {
+		g_queue_foreach (tracker->file_change_queue,
+				 free_file_change_queue, NULL);
+		g_queue_free (tracker->file_change_queue);
+		tracker->file_change_queue = NULL;
+	}
+
 	g_main_loop_quit (tracker->loop);
 
 	exit (EXIT_SUCCESS);
Index: trunk/src/trackerd/tracker-db.c
===================================================================
--- trunk/src/trackerd/tracker-db.c	(revision 1117)
+++ trunk/src/trackerd/tracker-db.c	(working copy)
@@ -33,6 +33,9 @@
 extern Tracker *tracker;
 
 #define XMP_MIME_TYPE "application/rdf+xml"
+#define STACK_SIZE 3
+#define MAX_DURATION 180
+#define MAX_CHANGE_TIMES 10
 
 typedef struct {
 	DBConnection	*db_con;
@@ -548,7 +551,113 @@
 	tracker_notify_meta_data_available ();
 }
 
+static void
+refresh_file_change_queue (gpointer data, gpointer user_data)
+{
+	FileChange *change = (FileChange*)data;
+	int *current = (int *)user_data;
 
+	if ((*current - change->first_change_time) > MAX_DURATION) {
+		g_queue_remove_all (tracker->file_change_queue, data);
+		free_file_change (&change);
+	}
+}
+
+static gint
+uri_comp (gconstpointer a, gconstpointer b)
+{
+	FileChange *change = (FileChange *)a;
+	char *valuea = change->uri;
+	char *valueb = (char *)b;
+
+	return strcmp (valuea, valueb);
+}
+
+static gint
+file_change_sort_comp (gconstpointer a, gconstpointer b, gpointer user_data)
+{
+	FileChange *changea, *changeb;
+	changea = (FileChange *)a;
+	changeb = (FileChange *)b;
+
+	if ((changea->num_of_change - changeb->num_of_change) == 0) {
+		return changea->first_change_time - changeb->first_change_time;
+	} else {
+		return changea->num_of_change - changeb->num_of_change;
+	}
+}
+
+static void
+print_file_change_queue ()
+{
+	GList *head, *l;
+	FileChange *change;
+	gint count;
+
+	head = g_queue_peek_head_link (tracker->file_change_queue);
+
+	tracker_log ("File Change queue is:");
+	count = 1;
+	for (l = g_list_first (head); l != NULL; l = g_list_next (l)) {
+		change = (FileChange*)l->data;
+		tracker_log ("%d\t%s\t%d\t%d",
+			 count++, change->uri,
+			 change->first_change_time,
+			 change->num_of_change);
+	}
+	
+}
+
+static gboolean
+check_uri_changed_frequently (const char *uri)
+{
+	GList *find;
+	FileChange *change;
+	time_t current;
+
+	if (!tracker->file_change_queue) {
+		/* init queue */
+		tracker->file_change_queue = g_queue_new ();
+	}
+
+	current = time (NULL);
+
+	/* remove items which are very old */
+	g_queue_foreach (tracker->file_change_queue,
+			 refresh_file_change_queue, &current);
+
+	find = g_queue_find_custom (tracker->file_change_queue, uri, uri_comp);
+	if (!find) {
+		/* not found, add to in the queue */
+		change = g_new0 (FileChange, 1);
+		change->uri = g_strdup (uri);
+		change->first_change_time = current;
+		change->num_of_change = 1;
+		if (g_queue_get_length (tracker->file_change_queue) == STACK_SIZE) {
+			FileChange *tmp = (FileChange*) g_queue_pop_head (
+						tracker->file_change_queue);
+			free_file_change (&tmp);
+		}
+		g_queue_insert_sorted (tracker->file_change_queue, change,
+					file_change_sort_comp, NULL);
+		print_file_change_queue ();
+		return FALSE;
+	} else {
+		change = (FileChange *) find->data;
+		(change->num_of_change)++;
+		g_queue_sort (tracker->file_change_queue,
+			file_change_sort_comp, NULL);
+		if (change->num_of_change < MAX_CHANGE_TIMES) {
+			print_file_change_queue ();
+			return FALSE;
+		} else {
+			print_file_change_queue ();
+			return TRUE;
+		}
+	}
+
+}
+
 void
 tracker_db_insert_pending_file (DBConnection *db_con, guint32 file_id, const char *uri, const char *moved_to_uri, const char *mime, int counter, TrackerChangeAction action, gboolean is_directory, gboolean is_new, int service_type_id)
 {
@@ -556,6 +665,12 @@
 
 	g_return_if_fail (tracker_check_uri (uri));
 
+	/* check if uri changed too frequently */
+	if (((action == TRACKER_ACTION_CHECK) ||
+		(action == TRACKER_ACTION_FILE_CHECK)) &&
+		check_uri_changed_frequently (uri))
+		return;
+
 	/* check if uri already has a pending action and update accordingly */
 	info = tracker_db_get_pending_file (db_con, uri);
 
@@ -682,8 +797,8 @@
 	}
 
 	if (info->mime == NULL) {
-                info->mime = g_strdup("unknown");
-        }
+		info->mime = g_strdup("unknown");
+	}
 
 	if (info->is_new) {
 		if (info->mime)
_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list

Reply via email to