Package: libxapian30
Version: 1.4.5-1
Severity: important
Tags: security patch upstream

I spotted an HTML escaping bug in Xapian::MSet::snippet() while working
on the code.  This issue has been assigned CVE-2018-0499 by the security
team.

This bug is fixed by yesterday's upstream release 1.4.6 which I'm
intending to upload to unstable very shortly.  The attached patch should
be suitable for fixing this in older 1.4.x releases (1.2.x isn't
affected).

Cheers,
    Olly
diff --git a/xapian-core/queryparser/termgenerator_internal.cc b/xapian-core/queryparser/termgenerator_internal.cc
index 7fa807db6064..fece98554ebb 100644
--- a/xapian-core/queryparser/termgenerator_internal.cc
+++ b/xapian-core/queryparser/termgenerator_internal.cc
@@ -432,6 +432,27 @@ SnipPipe::done()
     }
 }
 
+inline void
+append_escaping_xml(const char* p, const char* end, string& output)
+{
+    while (p != end) {
+	char ch = *p++;
+	switch (ch) {
+	    case '&':
+		output += "&";
+		break;
+	    case '<':
+		output += "&lt;";
+		break;
+	    case '>':
+		output += "&gt;";
+		break;
+	    default:
+		output += ch;
+	}
+    }
+}
+
 inline bool
 SnipPipe::drain(const string & input,
 		const string & hi_start,
@@ -465,7 +486,7 @@ SnipPipe::drain(const string & input,
 
 	if (punc) {
 	    // Include end of sentence punctuation.
-	    output.append(input.data() + best_end, i.raw());
+	    append_escaping_xml(input.data() + best_end, i.raw(), output);
 	} else {
 	    // Append "..." or equivalent if this doesn't seem to be the start
 	    // of a sentence.
@@ -523,8 +544,7 @@ SnipPipe::drain(const string & input,
 	while (i != Utf8Iterator()) {
 	    unsigned ch = *i;
 	    if (Unicode::is_wordchar(ch)) {
-		const char * p = input.data() + best_begin;
-		output.append(p, i.raw() - p);
+		append_escaping_xml(input.data() + best_begin, i.raw(), output);
 		best_begin = i.raw() - input.data();
 		break;
 	    }
@@ -537,22 +557,9 @@ SnipPipe::drain(const string & input,
 	if (phrase_len) output += hi_start;
     }
 
-    while (best_begin != word.term_end) {
-	char ch = input[best_begin++];
-	switch (ch) {
-	    case '&':
-		output += "&amp;";
-		break;
-	    case '<':
-		output += "&lt;";
-		break;
-	    case '>':
-		output += "&gt;";
-		break;
-	    default:
-		output += ch;
-	}
-    }
+    const char* p = input.data();
+    append_escaping_xml(p + best_begin, p + word.term_end, output);
+    best_begin = word.term_end;
 
     if (phrase_len && --phrase_len == 0) output += hi_end;
 
diff --git a/xapian-core/tests/api_snippets.cc b/xapian-core/tests/api_snippets.cc
index 4c9296f88d84..70f6afac28bf 100644
--- a/xapian-core/tests/api_snippets.cc
+++ b/xapian-core/tests/api_snippets.cc
@@ -313,3 +313,23 @@ DEFINE_TESTCASE(snippet_empty, backend) {
 
     return true;
 }
+
+/// Check snippets escape HTML/XML suitably.
+DEFINE_TESTCASE(snippet_html_escape, backend) {
+    Xapian::Enquire enquire(get_database("apitest_simpledata"));
+    enquire.set_query(Xapian::Query("foo"));
+
+    Xapian::MSet mset = enquire.get_mset(0, 0);
+
+    Xapian::Stem stem;
+
+    const char *input = "#include <foo.h> to use libfoo";
+    TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem),
+		       "...&lt;<b>foo</b>.h&gt; to...");
+
+    input = "&foo takes the address of foo";
+    TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
+		       "&amp;<b>foo</b> takes the address of <b>foo</b>");
+
+    return true;
+}

Attachment: signature.asc
Description: PGP signature

Reply via email to