git send-email is mad about lines >998 characters in the test patch, so
I’m sending the patches as attachments to this email.  (Is there a
better way to include the expected output of a notmuch command which
outputs long lines in a test script?)

>From 23836241dd304b98f2a05803fbb5a5a94f563050 Mon Sep 17 00:00:00 2001
From: Aaron Ecay <[email protected]>
Date: Sun, 3 Mar 2013 18:14:07 -0500
Subject: [PATCH 1/2] test: add tests for the handling of References and
 In-Reply-To headers

These tests are known_broken, the following commit fixes them.
---
 test/thread-replies | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100755 test/thread-replies

diff --git a/test/thread-replies b/test/thread-replies
new file mode 100755
index 0000000..fd11a09
--- /dev/null
+++ b/test/thread-replies
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2013 Aaron Ecay
+#
+
+test_description='test of proper handling of in-reply-to and references headers
+
+This test makes sure that the thread structure in the notmuch database is
+constructed properly, even in the presence of non-RFC-compliant headers'
+
+. ./test-lib.sh
+
+test_begin_subtest "Use References when In-Reply-To is broken"
+test_subtest_known_broken
+add_message '[id]="[email protected]"' \
+    '[subject]=one'
+add_message '[in-reply-to]="mumble"' \
+    '[references]="<[email protected]>"' \
+    '[subject]="Re: one"'
+output=$(notmuch show --format=json 'subject:one')
+test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-001", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "one", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#1)\n"}]}, [[{"id": "msg-002@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-002", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: one", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#2)\n"}]}, []]]]]]'
+
+test_begin_subtest "Prefer References to In-Reply-To"
+test_subtest_known_broken
+add_message '[id]="[email protected]"' \
+    '[subject]=two'
+add_message '[in-reply-to]="<[email protected]>"' \
+    '[references]="<[email protected]>"' \
+    '[subject]="Re: two"'
+output=$(notmuch show --format=json 'subject:two')
+test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-003", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "two", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#3)\n"}]}, [[{"id": "msg-004@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-004", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: two", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#4)\n"}]}, []]]]]]'
+
+test_begin_subtest "Use In-Reply-To when no References"
+test_subtest_known_broken
+add_message '[id]="[email protected]"' \
+    '[subject]="three"'
+add_message '[in-reply-to]="<[email protected]>"' \
+    '[subject]="Re: three"'
+output=$(notmuch show --format=json 'subject:three')
+test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-005", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "three", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#5)\n"}]}, [[{"id": "msg-006@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-006", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: three", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#6)\n"}]}, []]]]]]'
+
+test_begin_subtest "Use last Reference"
+test_subtest_known_broken
+add_message '[id]="[email protected]"' \
+    '[subject]="four"'
+add_message '[id]="[email protected]"' \
+    '[subject]="not-four"'
+add_message '[in-reply-to]="<[email protected]>"' \
+    '[references]="<[email protected]> <[email protected]>"' \
+    '[subject]="neither"'
+output=$(notmuch show --format=json 'subject:four')
+test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-007", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "four", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#7)\n"}]}, [[{"id": "msg-009@notmuch-test-suite", "match": false, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-009", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "neither", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#9)\n"}]}, []]]]], [[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-008", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "not-four", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#8)\n"}]}, []]]]'
+
+
+test_done
-- 
1.8.1.5

>From 57739b9722a86ba50ef97ad7d5d21b3e5bc1a977 Mon Sep 17 00:00:00 2001
From: Aaron Ecay <[email protected]>
Date: Mon, 25 Feb 2013 18:46:41 -0500
Subject: [PATCH 2/2] lib/database.cc: change how the parent of a message is
 calculated

Presently, the code which finds the parent of a message as it is being
added to the database assumes that the first Message-ID-like substring
of the In-Reply-To header is the parent Message ID.  Some mail clients,
however, put stuff other than the Message-ID of the parent in the
In-Reply-To header, such as the email address of the sender of the
parent.  This can fool notmuch.

The updated algorithm prefers the last Message ID in the References
header.  The References header lists messages oldest-first, so the last
Message ID is the parent (RFC2822, p. 24).  The References header is
also less likely to be in a non-standard
syntax (http://cr.yp.to/immhf/thread.html,
http://www.jwz.org/doc/threading.html).  In case the References header
is not to be found, fall back to the old behavior.

V2 of this patch, incorporating feedback from Jani and (indirectly)
Austin.
---
 lib/database.cc     | 48 +++++++++++++++++++++++++++++++++---------------
 test/thread-replies |  4 ----
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 91d4329..52ed618 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -501,8 +501,10 @@ _parse_message_id (void *ctx, const char *message_id, const char **next)
  * 'message_id' in the result (to avoid mass confusion when a single
  * message references itself cyclically---and yes, mail messages are
  * not infrequent in the wild that do this---don't ask me why).
-*/
-static void
+ *
+ * Return the last reference parsed, if it is not equal to message_id.
+ */
+static char *
 parse_references (void *ctx,
 		  const char *message_id,
 		  GHashTable *hash,
@@ -511,7 +513,7 @@ parse_references (void *ctx,
     char *ref;
 
     if (refs == NULL || *refs == '\0')
-	return;
+	return NULL;
 
     while (*refs) {
 	ref = _parse_message_id (ctx, refs, &refs);
@@ -519,6 +521,17 @@ parse_references (void *ctx,
 	if (ref && strcmp (ref, message_id))
 	    g_hash_table_insert (hash, ref, NULL);
     }
+
+    /* The return value of this function is used to add a parent
+     * reference to the database.  We should avoid making a message
+     * its own parent, thus the following check.
+     */
+
+    if (ref && strcmp(ref, message_id)) {
+	return ref;
+    } else {
+	return NULL;
+    }
 }
 
 notmuch_status_t
@@ -1510,28 +1523,33 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
 {
     GHashTable *parents = NULL;
     const char *refs, *in_reply_to, *in_reply_to_message_id;
+    const char *last_ref_message_id, *this_message_id;
     GList *l, *keys = NULL;
     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
 
     parents = g_hash_table_new_full (g_str_hash, g_str_equal,
 				     _my_talloc_free_for_g_hash, NULL);
+    this_message_id = notmuch_message_get_message_id (message);
 
     refs = notmuch_message_file_get_header (message_file, "references");
-    parse_references (message, notmuch_message_get_message_id (message),
-		      parents, refs);
+    last_ref_message_id = parse_references (message,
+					    this_message_id,
+					    parents, refs);
 
     in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
-    parse_references (message, notmuch_message_get_message_id (message),
-		      parents, in_reply_to);
-
-    /* Carefully avoid adding any self-referential in-reply-to term. */
-    in_reply_to_message_id = _parse_message_id (message, in_reply_to, NULL);
-    if (in_reply_to_message_id &&
-	strcmp (in_reply_to_message_id,
-		notmuch_message_get_message_id (message)))
-    {
+    in_reply_to_message_id = parse_references (message,
+					       this_message_id,
+					       parents, in_reply_to);
+
+    /* For the parent of this message, use the last message ID of the
+     * References header, if available.  If not, fall back to the
+     * first message ID in the In-Reply-To header. */
+    if (last_ref_message_id) {
+        _notmuch_message_add_term (message, "replyto",
+                                   last_ref_message_id);
+    } else if (in_reply_to_message_id) {
 	_notmuch_message_add_term (message, "replyto",
-			     _parse_message_id (message, in_reply_to, NULL));
+			     in_reply_to_message_id);
     }
 
     keys = g_hash_table_get_keys (parents);
diff --git a/test/thread-replies b/test/thread-replies
index fd11a09..6dc6143 100755
--- a/test/thread-replies
+++ b/test/thread-replies
@@ -11,7 +11,6 @@ constructed properly, even in the presence of non-RFC-compliant headers'
 . ./test-lib.sh
 
 test_begin_subtest "Use References when In-Reply-To is broken"
-test_subtest_known_broken
 add_message '[id]="[email protected]"' \
     '[subject]=one'
 add_message '[in-reply-to]="mumble"' \
@@ -21,7 +20,6 @@ output=$(notmuch show --format=json 'subject:one')
 test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-001", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "one", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#1)\n"}]}, [[{"id": "msg-002@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-002", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: one", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#2)\n"}]}, []]]]]]'
 
 test_begin_subtest "Prefer References to In-Reply-To"
-test_subtest_known_broken
 add_message '[id]="[email protected]"' \
     '[subject]=two'
 add_message '[in-reply-to]="<[email protected]>"' \
@@ -31,7 +29,6 @@ output=$(notmuch show --format=json 'subject:two')
 test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-003", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "two", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#3)\n"}]}, [[{"id": "msg-004@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-004", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: two", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#4)\n"}]}, []]]]]]'
 
 test_begin_subtest "Use In-Reply-To when no References"
-test_subtest_known_broken
 add_message '[id]="[email protected]"' \
     '[subject]="three"'
 add_message '[in-reply-to]="<[email protected]>"' \
@@ -40,7 +37,6 @@ output=$(notmuch show --format=json 'subject:three')
 test_expect_equal "$output" '[[[{"id": "[email protected]", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-005", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "three", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#5)\n"}]}, [[{"id": "msg-006@notmuch-test-suite", "match": true, "excluded": false, "filename": "/home/aecay/development/notmuch/notmuch-git/src/notmuch/test/tmp.thread-replies/mail/msg-006", "timestamp": 978709437, "date_relative": "2001-01-05", "tags": ["inbox", "unread"], "headers": {"Subject": "Re: three", "From": "Notmuch Test Suite <[email protected]>", "To": "Notmuch Test Suite <[email protected]>", "Date": "Fri, 05 Jan 2001 15:43:57 +0000"}, "body": [{"id": 1, "content-type": "text/plain", "content": "This is just a test message (#6)\n"}]}, []]]]]]'
 
 test_begin_subtest "Use last Reference"
-test_subtest_known_broken
 add_message '[id]="[email protected]"' \
     '[subject]="four"'
 add_message '[id]="[email protected]"' \
-- 
1.8.1.5


-- 
Aaron Ecay
_______________________________________________
notmuch mailing list
[email protected]
http://notmuchmail.org/mailman/listinfo/notmuch

Reply via email to