The goal is to have (subject foo-bar) match the same messages as
subject:foo-bar.
---
 lib/parse-sexp.cc         | 38 +++++++++++++++++++++++++++++++++-----
 test/T081-sexpr-search.sh |  8 ++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 898cfdd0..fc6eb2d7 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -72,6 +72,34 @@ _notmuch_sexp_string_to_xapian_query (notmuch_database_t 
*notmuch, const char *q
     return _sexp_to_xapian_query (notmuch, sx, output);
 }
 
+static void
+_sexp_find_words (const char *str, std::string pref_str, 
std::vector<std::string> &terms)
+{
+    Xapian::Utf8Iterator p (str);
+    Xapian::Utf8Iterator end;
+
+    while (p != end) {
+       Xapian::Utf8Iterator start;
+       while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+           p++;
+
+       if (p == end)
+           break;
+
+       start = p;
+
+       while (p != end && Xapian::Unicode::is_wordchar (*p))
+           p++;
+
+       if (p != start) {
+           std::string word (start, p);
+           word = Xapian::Unicode::tolower (word);
+           terms.push_back (pref_str + word);
+       }
+    }
+
+}
+
 static notmuch_status_t
 _sexp_combine_field (const char *prefix,
                     Xapian::Query::op operation,
@@ -82,12 +110,12 @@ _sexp_combine_field (const char *prefix,
 
     for (const sexp_t *cur = sx; cur; cur = cur->next) {
        std::string pref_str = prefix;
-       std::string word = cur->val;
 
-       if (operation == Xapian::Query::OP_PHRASE)
-           word = Xapian::Unicode::tolower (word);
-
-       terms.push_back (pref_str + word);
+       if (operation == Xapian::Query::OP_PHRASE) {
+           _sexp_find_words (cur->val, pref_str, terms);
+       } else {
+           terms.push_back (pref_str + cur->val);
+       }
     }
     output = Xapian::Query (operation, terms.begin (), terms.end ());
     return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 872f2603..8e042f88 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -34,6 +34,14 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 
12:00:00 -0000"'
 output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | 
notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
 
+test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
+output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
+output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
+
 test_begin_subtest "Unbalanced parens"
 # A code 1 indicates the error was handled (a crash will return e.g. 139).
 test_expect_code 1 "notmuch search --query-syntax=sexp '('"
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org

Reply via email to