[PATCH 1/1] Stores the folder (directory name) of the message in the database as a term with folder prefix.

2010-04-14 Thread Micah Anderson
  This patch was originally sent by Andreas Kl?ckner in:
  id:200912141421.52561.lists at informa.tiker.net. It was then
  subsequently updated by Michal Sojka in:
  id:1264692317-9175-2-git-send-email-sojkam1 at fel.cvut.cz and then
  further improved again by Michal Sojka in:
  id:1265122868-12133-1-git-send-email-sojkam1 at fel.cvut.cz

  This is a rebase of the latest patch off of the current git HEAD.

. The differences from the original patch are:
  - Rebased off of current git HEAD
  - Folder name is taken from strings generated during travesal. It no
longer uses glib nor it allocates additional memory to determine the
base name. The same approach as in
id:87fx8bygi7.fsf at linux.vnet.ibm.com was used.
  - Removed unrelated change which was submitted separately as
id:1264691584-8290-2-git-send-email-sojkam1 at fel.cvut.cz
  - Changed the comment describing database schema.

TODO (see Carl's email: id:87zl5k0w6s.fsf at yoom.home.cworth.org):

  - Support hierarchical folders: this patch is only storing the final
directory component. This should be hooked in differently with
filename storage so the whole filename is indexed as text to
provide arbitrary search phrases such as "folder:'foo/bar/baz'"
---
 lib/database.cc |   13 +
 lib/notmuch.h   |1 +
 notmuch-new.c   |   39 +--
 3 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index c91e97c..6364623 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -84,9 +84,9 @@ typedef struct {
  * MESSAGE_ID: The unique ID of the mail mess (see "id" above)
  *
  * In addition, terms from the content of the message are added with
- * "from", "to", "attachment", and "subject" prefixes for use by the
- * user in searching. But the database doesn't really care itself
- * about any of these.
+ * "from", "to", "attachment", "subject" and "folder" prefixes for use
+ * by the user in searching. But the database doesn't really care
+ * itself about any of these.
  *
  * The data portion of a mail document is empty.
  *
@@ -155,7 +155,8 @@ prefix_t PROBABILISTIC_PREFIX[]= {
 { "from",  "XFROM" },
 { "to","XTO" },
 { "attachment","XATTACHMENT" },
-{ "subject",   "XSUBJECT"}
+{ "subject",   "XSUBJECT"},
+{ "folder","XFOLDER"}
 };

 int
@@ -1362,6 +1363,7 @@ _notmuch_database_link_message (notmuch_database_t 
*notmuch,
 notmuch_status_t
 notmuch_database_add_message (notmuch_database_t *notmuch,
  const char *filename,
+ const char *folder_name,
  notmuch_message_t **message_ret)
 {
 notmuch_message_file_t *message_file;
@@ -1477,6 +1479,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
date = notmuch_message_file_get_header (message_file, "date");
_notmuch_message_set_date (message, date);

+   if (folder_name != NULL)
+   _notmuch_message_gen_terms (message, "folder", folder_name);
+
_notmuch_message_index_file (message, filename);
} else {
ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 88da078..ce81565 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -264,6 +264,7 @@ notmuch_database_get_directory (notmuch_database_t 
*database,
 notmuch_status_t
 notmuch_database_add_message (notmuch_database_t *database,
  const char *filename,
+ const char *folder_name,
  notmuch_message_t **message);

 /* Remove a message from the given notmuch database.
diff --git a/notmuch-new.c b/notmuch-new.c
index 44b50aa..6ad3c09 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -21,6 +21,7 @@
 #include "notmuch-client.h"

 #include 
+#include 

 typedef struct _filename_node {
 char *filename;
@@ -169,6 +170,35 @@ _entries_resemble_maildir (struct dirent **entries, int 
count)
 return 0;
 }

+static char*
+_get_folder_base_name(const char *path)
+{
+  gchar *full_folder_name = NULL;
+  gchar *folder_base_name = NULL;
+  
+  /* Find name of "folder" containing the email. */
+  full_folder_name = g_strdup(path);
+  while (1) {
+folder_base_name = g_path_get_basename(full_folder_name);
+
+if (strcmp(folder_base_name, "cur") == 0
+   || strcmp(folder_base_name, "new") == 0) {
+  gchar *parent_name = g_path_get_dirname(full_folder_name);
+  g_free(full_folder_name);
+  full_folder_name = parent_name;
+} else
+  break;
+  }
+  
+  g_free(full_folder_name);
+  
+  if (strcmp(folder_base_name, ".") == 0) {
+g_free(folder_base_name);
+folder_base_name = NULL;
+  }
+  return folder_base_name;
+}
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (fs_mtime)
@@ 

[PATCH 1/1] Stores the folder (directory name) of the message in the database as a term with folder prefix.

2010-04-14 Thread Micah Anderson
  This patch was originally sent by Andreas Klöckner in:
  id:200912141421.52561.li...@informa.tiker.net. It was then
  subsequently updated by Michal Sojka in:
  id:1264692317-9175-2-git-send-email-sojk...@fel.cvut.cz and then
  further improved again by Michal Sojka in:
  id:1265122868-12133-1-git-send-email-sojk...@fel.cvut.cz

  This is a rebase of the latest patch off of the current git HEAD.

. The differences from the original patch are:
  - Rebased off of current git HEAD
  - Folder name is taken from strings generated during travesal. It no
longer uses glib nor it allocates additional memory to determine the
base name. The same approach as in
id:87fx8bygi7@linux.vnet.ibm.com was used.
  - Removed unrelated change which was submitted separately as
id:1264691584-8290-2-git-send-email-sojk...@fel.cvut.cz
  - Changed the comment describing database schema.

TODO (see Carl's email: id:87zl5k0w6s@yoom.home.cworth.org):

  - Support hierarchical folders: this patch is only storing the final
directory component. This should be hooked in differently with
filename storage so the whole filename is indexed as text to
provide arbitrary search phrases such as folder:'foo/bar/baz'
---
 lib/database.cc |   13 +
 lib/notmuch.h   |1 +
 notmuch-new.c   |   39 +--
 3 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index c91e97c..6364623 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -84,9 +84,9 @@ typedef struct {
  * MESSAGE_ID: The unique ID of the mail mess (see id above)
  *
  * In addition, terms from the content of the message are added with
- * from, to, attachment, and subject prefixes for use by the
- * user in searching. But the database doesn't really care itself
- * about any of these.
+ * from, to, attachment, subject and folder prefixes for use
+ * by the user in searching. But the database doesn't really care
+ * itself about any of these.
  *
  * The data portion of a mail document is empty.
  *
@@ -155,7 +155,8 @@ prefix_t PROBABILISTIC_PREFIX[]= {
 { from,  XFROM },
 { to,XTO },
 { attachment,XATTACHMENT },
-{ subject,   XSUBJECT}
+{ subject,   XSUBJECT},
+{ folder,XFOLDER}
 };
 
 int
@@ -1362,6 +1363,7 @@ _notmuch_database_link_message (notmuch_database_t 
*notmuch,
 notmuch_status_t
 notmuch_database_add_message (notmuch_database_t *notmuch,
  const char *filename,
+ const char *folder_name,
  notmuch_message_t **message_ret)
 {
 notmuch_message_file_t *message_file;
@@ -1477,6 +1479,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
date = notmuch_message_file_get_header (message_file, date);
_notmuch_message_set_date (message, date);
 
+   if (folder_name != NULL)
+   _notmuch_message_gen_terms (message, folder, folder_name);
+
_notmuch_message_index_file (message, filename);
} else {
ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 88da078..ce81565 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -264,6 +264,7 @@ notmuch_database_get_directory (notmuch_database_t 
*database,
 notmuch_status_t
 notmuch_database_add_message (notmuch_database_t *database,
  const char *filename,
+ const char *folder_name,
  notmuch_message_t **message);
 
 /* Remove a message from the given notmuch database.
diff --git a/notmuch-new.c b/notmuch-new.c
index 44b50aa..6ad3c09 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -21,6 +21,7 @@
 #include notmuch-client.h
 
 #include unistd.h
+#include glib.h
 
 typedef struct _filename_node {
 char *filename;
@@ -169,6 +170,35 @@ _entries_resemble_maildir (struct dirent **entries, int 
count)
 return 0;
 }
 
+static char*
+_get_folder_base_name(const char *path)
+{
+  gchar *full_folder_name = NULL;
+  gchar *folder_base_name = NULL;
+  
+  /* Find name of folder containing the email. */
+  full_folder_name = g_strdup(path);
+  while (1) {
+folder_base_name = g_path_get_basename(full_folder_name);
+
+if (strcmp(folder_base_name, cur) == 0
+   || strcmp(folder_base_name, new) == 0) {
+  gchar *parent_name = g_path_get_dirname(full_folder_name);
+  g_free(full_folder_name);
+  full_folder_name = parent_name;
+} else
+  break;
+  }
+  
+  g_free(full_folder_name);
+  
+  if (strcmp(folder_base_name, .) == 0) {
+g_free(folder_base_name);
+folder_base_name = NULL;
+  }
+  return folder_base_name;
+}
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (fs_mtime)
@@ -222,6 +252,7 @@ add_files_recursive (notmuch_database_t