When we see a message where we already have the file
id stored, check if the size is larger. If it is then
re-index and set the file size and name to be the
new message.
---

  Here's the (quite simple) patch to implement indexing the
  largest copy of each mail that we have.

  Does the re-indexing replace the old terms? In the case
  where you had a collision with different text this could
  make a search return mails that don't contain that text.
  I don't think it's a big issue though, even if that is the
  case.

  Thanks,

  James

 lib/database.cc       |    4 +++-
 lib/index.cc          |   27 +++++++++++++++++++++++++++
 lib/message.cc        |   31 ++++++++++++++++++++++++++-----
 lib/notmuch-private.h |   13 +++++++++++++
 lib/notmuch.h         |    5 +++--
 5 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index d834d94..64f29b9 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1000,7 +1000,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
            if (ret)
                goto DONE;
        } else {
-           ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+           ret = _notmuch_message_possibly_reindex (message, filename, size);
+           if (!ret)
+               ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
            goto DONE;
        }

diff --git a/lib/index.cc b/lib/index.cc
index 125fa6c..14c3268 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -312,3 +312,30 @@ _notmuch_message_index_file (notmuch_message_t *message,

     return ret;
 }
+
+notmuch_status_t
+_notmuch_message_possibly_reindex (notmuch_message_t *message,
+                            const char *filename,
+                            const off_t size)
+{
+    off_t realsize = size;
+    off_t stored_size;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+
+    ret = _notmuch_message_size_on_disk (message, filename, &realsize);
+    if (ret)
+        goto DONE;
+    stored_size = _notmuch_message_get_filesize (message);
+    if (realsize > stored_size) {
+       ret = _notmuch_message_index_file (message, filename);
+       if (ret)
+           goto DONE;
+       ret = _notmuch_message_set_filesize (message, filename, realsize);
+       _notmuch_message_set_filename (message, filename);
+       _notmuch_message_sync (message);
+    }
+
+  DONE:
+    return ret;
+
+}
diff --git a/lib/message.cc b/lib/message.cc
index 2bfc5ed..cc32741 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -427,23 +427,38 @@ _notmuch_message_set_filename (notmuch_message_t *message,
 }

 notmuch_status_t
-_notmuch_message_set_filesize (notmuch_message_t *message,
+_notmuch_message_size_on_disk (notmuch_message_t *message,
                               const char *filename,
-                              const off_t size)
+                              off_t *size)
 {
     struct stat st;
-    off_t realsize = size;
     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;

-    if (realsize < 0) {
+    if (*size < 0) {
        if (stat (filename, &st)) {
            ret = NOTMUCH_STATUS_FILE_ERROR;
            goto DONE;
        } else {
-           realsize = st.st_size;
+           *size = st.st_size;
        }
     }

+  DONE:
+    return ret;
+}
+
+notmuch_status_t
+_notmuch_message_set_filesize (notmuch_message_t *message,
+                              const char *filename,
+                              const off_t size)
+{
+    off_t realsize = size;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+
+    ret = _notmuch_message_size_on_disk (message, filename, &realsize);
+    if (ret)
+        goto DONE;
+
     message->doc.add_value (NOTMUCH_VALUE_FILESIZE,
                         Xapian::sortable_serialise (realsize));

@@ -451,6 +466,12 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
     return ret;
 }

+off_t
+_notmuch_message_get_filesize (notmuch_message_t *message)
+{
+    return Xapian::sortable_unserialise (message->doc.get_value 
(NOTMUCH_VALUE_FILESIZE));
+}
+
 const char *
 notmuch_message_get_filename (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 1ba3055..cf65fd9 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -199,6 +199,14 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
                               const char *filename,
                               const off_t size);

+off_t
+_notmuch_message_get_filesize (notmuch_message_t *message);
+
+notmuch_status_t
+_notmuch_message_size_on_disk (notmuch_message_t *message,
+                              const char *filename,
+                              off_t *size);
+
 void
 _notmuch_message_ensure_thread_id (notmuch_message_t *message);

@@ -218,6 +226,11 @@ notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
                             const char *filename);

+notmuch_status_t
+_notmuch_message_possibly_reindex (notmuch_message_t *message,
+                            const char *filename,
+                            const off_t size);
+
 /* message-file.c */

 /* XXX: I haven't decided yet whether these will actually get exported
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 5d0d224..892e420 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -256,8 +256,9 @@ notmuch_database_get_timestamp (notmuch_database_t 
*database,
  * NOTMUCH_STATUS_SUCCESS: Message successfully added to database.
  *
  * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message
- *     ID as another message already in the database. Nothing added
- *     to the database.
+ *     ID as another message already in the database. This may have
+ *     caused some further indexing to be done, but it is not an entirely
+ *     new message.
  *
  * NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the
  *     file, (such as permission denied, or file not found,
-- 
1.6.3.3

Reply via email to