[notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread Dirk-Jan C. Binnema
Hi David,

> "DM" == David Maus  writes:

>> There is one maybe controversial change, namely that it ignores all
>> dot-dirs; this works fine for .notmuch and .nnmaildir (gnus), but maybe
>> there is some valid use case for having mail in dot-dirs. Maybe one of
>> the IMAP-servers does this? Not sure. Anyway, I can change that part.

DM> Yes, ignore dot-dirs completely is not a good idea as the "." is a
DM> common separator for mailbox hierarchies on imap servers. Dovecot uses
DM> it by default, Courier too.

Thanks. I was suspecting something like that. Attached, updated patch that
also updates the counting part.

---
 notmuch-new.c |   75 ++---
 1 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 9d20616..411e084 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -109,6 +109,44 @@ is_maildir (struct dirent **entries, int count)
 return 0;
 }

+
+static int
+ignore_dir_entry (const char* path, struct dirent *entry)
+{
+char noindex[4096]; /* any path will fit */
+
+/* ignore everything starting with a dot; this covers hidden
+* files, as well as special dir (. and ..), but also things like
+* gnus .nnmaildir or .notmuch */
+
+/* special handling for dot-dirs */
+if (entry->d_name[0] == '.') {
+
+   /* ignore '.' and '..' */
+   if (entry->d_name[1] == '\0' ||
+   (entry->d_name[1] == '.' && entry->d_name[2] == '\0')) 
+   return 1;
+   
+   if (entry->d_name[1] == 'n')  { /* optimization */
+   /* ignore notmuch, gnus special dirs (or such-named files) */
+   if (strcmp (entry->d_name, ".notmuch") == 0 ||
+   strcmp (entry->d_name, ".nnmaildir") == 0)
+   return 1;
+   }
+}
+
+/* we also check if dir contains a file called '.noindex'; if so,
+ * we ignore this directory; alloca would be suitable here, if not
+ * for the portability. */
+snprintf (noindex, sizeof(noindex), "%s/%s/.noindex", path, entry->d_name);
+if (access (noindex, F_OK) == 0)
+   return 1;
+
+return 0; /* don't ignore */
+}
+
+
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (path_mtime)
@@ -181,21 +219,17 @@ add_files_recursive (notmuch_database_t *notmuch,
if (path_mtime <= path_dbtime && entry->d_type == DT_REG)
continue;

-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   (entry->d_type == DT_DIR &&
-(strcmp (entry->d_name, "tmp") == 0) &&
-is_maildir (namelist, num_entries)) ||
-   strcmp (entry->d_name, ".notmuch") ==0)
-   {
-   continue;
-   }

+   /* ignore tmp Maildirs, for obvious reasons */
+   if (entry->d_type == DT_DIR &&
+   (strcmp (entry->d_name, "tmp") == 0) &&
+   is_maildir (namelist, num_entries))
+   continue;
+   
+   /* ignore special directories and files */
+   if (ignore_dir_entry (path, entry))
+   continue;
+   
next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);

if (stat (next, st)) {
@@ -394,18 +428,9 @@ count_files (const char *path, int *count)

 entry= namelist[i++];

-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   strcmp (entry->d_name, ".notmuch") == 0)
-   {
+   if (ignore_dir_entry (path, entry))
continue;
-   }
-
+   
if (asprintf (&next, "%s/%s", path, entry->d_name) == -1) {
next = NULL;
fprintf (stderr, "Error descending from %s to %s: Out of memory\n",
-- 
1.6.3.3



[notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread David Maus
At Sat, 12 Dec 2009 13:36:24 +0200,
Dirk-Jan C. Binnema wrote:
> There is one maybe controversial change, namely that it ignores all dot-dirs;
> this works fine for .notmuch and .nnmaildir (gnus), but maybe there is some
> valid use case for having mail in dot-dirs. Maybe one of the IMAP-servers does
> this? Not sure. Anyway, I can change that part.

Yes, ignore dot-dirs completely is not a good idea as the "." is a
common separator for mailbox hierarchies on imap servers. Dovecot uses
it by default, Courier too.

Regrads,

  -- David

-- 
OpenPGP... 0x316F4BE4670716FD
Jabber dmjena at jabber.org
Email. maus.david at gmail.com
ICQ... 241051416


[notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread Dirk-Jan C. Binnema
Hi all,

This is a draft patch which hopefully improves the check whether a dir entry
should be ignored for that. It adds one feature: if you put a file '.noindex'
in a dir, the whole dir will be ignored for indexing. I find this very useful
for removing e.g. folders with spam messages from the indexing.

There is one maybe controversial change, namely that it ignores all dot-dirs;
this works fine for .notmuch and .nnmaildir (gnus), but maybe there is some
valid use case for having mail in dot-dirs. Maybe one of the IMAP-servers does
this? Not sure. Anyway, I can change that part.

If the overall approach is considered OK, I can make a new patch

Best wishes,
Dirk.


---
 notmuch-new.c |   48 ++--
 1 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 9d20616..28f69bc 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -109,6 +109,30 @@ is_maildir (struct dirent **entries, int count)
 return 0;
 }

+
+static int
+ignore_dir_entry (const char* path, struct dirent *entry)
+{
+char noindex[4096]; /* any path will fit */
+
+/* ignore everything starting with a dot; this covers hidden
+* files, as well as special dir (. and ..), but also things like
+* gnus .nnmaildir or .notmuch */
+if (entry->d_name[0] == '.')
+   return 1;
+
+/* we also check if dir contains a file called '.noindex'; if so,
+ * we ignore this directory; alloca would be suitable here, if not
+ * for the portability. */
+snprintf (noindex, sizeof(noindex), "%s/%s/.noindex", path, entry->d_name);
+if (access (noindex, F_OK) == 0)
+   return 1;
+
+return 0; /* don't ignore */
+}
+
+
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (path_mtime)
@@ -181,21 +205,17 @@ add_files_recursive (notmuch_database_t *notmuch,
if (path_mtime <= path_dbtime && entry->d_type == DT_REG)
continue;

-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   (entry->d_type == DT_DIR &&
-(strcmp (entry->d_name, "tmp") == 0) &&
-is_maildir (namelist, num_entries)) ||
-   strcmp (entry->d_name, ".notmuch") ==0)
-   {
-   continue;
-   }

+   /* ignore tmp Maildirs, for obvious reasons */
+   if (entry->d_type == DT_DIR &&
+   (strcmp (entry->d_name, "tmp") == 0) &&
+   is_maildir (namelist, num_entries))
+   continue;
+   
+   /* ignore special directories and files */
+   if (ignore_dir_entry (path, entry))
+   continue;
+   
next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);

if (stat (next, st)) {
-- 
1.6.3.3



Re: [notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread Dirk-Jan C . Binnema
Hi David,

> "DM" == David Maus  writes:

>> There is one maybe controversial change, namely that it ignores all
>> dot-dirs; this works fine for .notmuch and .nnmaildir (gnus), but maybe
>> there is some valid use case for having mail in dot-dirs. Maybe one of
>> the IMAP-servers does this? Not sure. Anyway, I can change that part.

DM> Yes, ignore dot-dirs completely is not a good idea as the "." is a
DM> common separator for mailbox hierarchies on imap servers. Dovecot uses
DM> it by default, Courier too.

Thanks. I was suspecting something like that. Attached, updated patch that
also updates the counting part.

---
 notmuch-new.c |   75 ++---
 1 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 9d20616..411e084 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -109,6 +109,44 @@ is_maildir (struct dirent **entries, int count)
 return 0;
 }
 
+
+static int
+ignore_dir_entry (const char* path, struct dirent *entry)
+{
+char noindex[4096]; /* any path will fit */
+
+/* ignore everything starting with a dot; this covers hidden
+* files, as well as special dir (. and ..), but also things like
+* gnus .nnmaildir or .notmuch */
+
+/* special handling for dot-dirs */
+if (entry->d_name[0] == '.') {
+
+   /* ignore '.' and '..' */
+   if (entry->d_name[1] == '\0' ||
+   (entry->d_name[1] == '.' && entry->d_name[2] == '\0')) 
+   return 1;
+   
+   if (entry->d_name[1] == 'n')  { /* optimization */
+   /* ignore notmuch, gnus special dirs (or such-named files) */
+   if (strcmp (entry->d_name, ".notmuch") == 0 ||
+   strcmp (entry->d_name, ".nnmaildir") == 0)
+   return 1;
+   }
+}
+
+/* we also check if dir contains a file called '.noindex'; if so,
+ * we ignore this directory; alloca would be suitable here, if not
+ * for the portability. */
+snprintf (noindex, sizeof(noindex), "%s/%s/.noindex", path, entry->d_name);
+if (access (noindex, F_OK) == 0)
+   return 1;
+
+return 0; /* don't ignore */
+}
+
+
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (path_mtime)
@@ -181,21 +219,17 @@ add_files_recursive (notmuch_database_t *notmuch,
if (path_mtime <= path_dbtime && entry->d_type == DT_REG)
continue;
 
-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   (entry->d_type == DT_DIR &&
-(strcmp (entry->d_name, "tmp") == 0) &&
-is_maildir (namelist, num_entries)) ||
-   strcmp (entry->d_name, ".notmuch") ==0)
-   {
-   continue;
-   }
 
+   /* ignore tmp Maildirs, for obvious reasons */
+   if (entry->d_type == DT_DIR &&
+   (strcmp (entry->d_name, "tmp") == 0) &&
+   is_maildir (namelist, num_entries))
+   continue;
+   
+   /* ignore special directories and files */
+   if (ignore_dir_entry (path, entry))
+   continue;
+   
next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
 
if (stat (next, st)) {
@@ -394,18 +428,9 @@ count_files (const char *path, int *count)
 
 entry= namelist[i++];
 
-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   strcmp (entry->d_name, ".notmuch") == 0)
-   {
+   if (ignore_dir_entry (path, entry))
continue;
-   }
-
+   
if (asprintf (&next, "%s/%s", path, entry->d_name) == -1) {
next = NULL;
fprintf (stderr, "Error descending from %s to %s: Out of memory\n",
-- 
1.6.3.3

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


Re: [notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread David Maus
At Sat, 12 Dec 2009 13:36:24 +0200,
Dirk-Jan C. Binnema wrote:
> There is one maybe controversial change, namely that it ignores all dot-dirs;
> this works fine for .notmuch and .nnmaildir (gnus), but maybe there is some
> valid use case for having mail in dot-dirs. Maybe one of the IMAP-servers does
> this? Not sure. Anyway, I can change that part.

Yes, ignore dot-dirs completely is not a good idea as the "." is a
common separator for mailbox hierarchies on imap servers. Dovecot uses
it by default, Courier too.

Regrads,

  -- David

-- 
OpenPGP... 0x316F4BE4670716FD
Jabber dmj...@jabber.org
Email. maus.da...@gmail.com
ICQ... 241051416
___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


[notmuch] Subject: [PATCH] update the check whether a dir entry should be ignored.

2009-12-12 Thread Dirk-Jan C . Binnema
Hi all,

This is a draft patch which hopefully improves the check whether a dir entry
should be ignored for that. It adds one feature: if you put a file '.noindex'
in a dir, the whole dir will be ignored for indexing. I find this very useful
for removing e.g. folders with spam messages from the indexing.

There is one maybe controversial change, namely that it ignores all dot-dirs;
this works fine for .notmuch and .nnmaildir (gnus), but maybe there is some
valid use case for having mail in dot-dirs. Maybe one of the IMAP-servers does
this? Not sure. Anyway, I can change that part.

If the overall approach is considered OK, I can make a new patch

Best wishes,
Dirk.


---
 notmuch-new.c |   48 ++--
 1 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 9d20616..28f69bc 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -109,6 +109,30 @@ is_maildir (struct dirent **entries, int count)
 return 0;
 }
 
+
+static int
+ignore_dir_entry (const char* path, struct dirent *entry)
+{
+char noindex[4096]; /* any path will fit */
+
+/* ignore everything starting with a dot; this covers hidden
+* files, as well as special dir (. and ..), but also things like
+* gnus .nnmaildir or .notmuch */
+if (entry->d_name[0] == '.')
+   return 1;
+
+/* we also check if dir contains a file called '.noindex'; if so,
+ * we ignore this directory; alloca would be suitable here, if not
+ * for the portability. */
+snprintf (noindex, sizeof(noindex), "%s/%s/.noindex", path, entry->d_name);
+if (access (noindex, F_OK) == 0)
+   return 1;
+
+return 0; /* don't ignore */
+}
+
+
+
 /* Examine 'path' recursively as follows:
  *
  *   o Ask the filesystem for the mtime of 'path' (path_mtime)
@@ -181,21 +205,17 @@ add_files_recursive (notmuch_database_t *notmuch,
if (path_mtime <= path_dbtime && entry->d_type == DT_REG)
continue;
 
-   /* Ignore special directories to avoid infinite recursion.
-* Also ignore the .notmuch directory.
-*/
-   /* XXX: Eventually we'll want more sophistication to let the
-* user specify files to be ignored. */
-   if (strcmp (entry->d_name, ".") == 0 ||
-   strcmp (entry->d_name, "..") == 0 ||
-   (entry->d_type == DT_DIR &&
-(strcmp (entry->d_name, "tmp") == 0) &&
-is_maildir (namelist, num_entries)) ||
-   strcmp (entry->d_name, ".notmuch") ==0)
-   {
-   continue;
-   }
 
+   /* ignore tmp Maildirs, for obvious reasons */
+   if (entry->d_type == DT_DIR &&
+   (strcmp (entry->d_name, "tmp") == 0) &&
+   is_maildir (namelist, num_entries))
+   continue;
+   
+   /* ignore special directories and files */
+   if (ignore_dir_entry (path, entry))
+   continue;
+   
next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
 
if (stat (next, st)) {
-- 
1.6.3.3

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch