Hello,
I opened bug https://bugzilla.gnome.org/show_bug.cgi?id=784894 to add
an escaping function that lives up to modern times escaping aka 12 year
old RFC3986 definition.

This was prompted by this downstream bug: https://bugzilla.redhat.com/s
how_bug.cgi?id=1458237 and the very good investigative work from John
Dennis.

I would like to know if this is the right/acceptable approach to deal
with this issue as I would like to backport (or reimplement in the
downstream project until available in libxml2) this patch to the
relevant downstream packages in order to fix the above mentioned bug.

I am attaching the patch here as well.

HTH,
Simo.
From 2d210f10ebc98d13dc35b172636efecb91f1b92a Mon Sep 17 00:00:00 2001
From: Simo Sorce <s...@redhat.com>
Date: Tue, 11 Jul 2017 07:46:06 -0400
Subject: [PATCH] Add 3986 compatible URI Escape function

RFC 3986 changed the set of (un)reserved characters, so we provide
a function that uses that RFC to generate an escaped URI. The original
function is preserved for backwards compatibility.

Signed-off-by: Simo Sorce <s...@redhat.com>
---
 include/libxml/uri.h |   2 +
 python/setup.py      |   2 +-
 uri.c                | 239 ++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 229 insertions(+), 14 deletions(-)

diff --git a/include/libxml/uri.h b/include/libxml/uri.h
index db48262..c9321a5 100644
--- a/include/libxml/uri.h
+++ b/include/libxml/uri.h
@@ -81,6 +81,8 @@ XMLPUBFUN int XMLCALL
 		xmlNormalizeURIPath	(char *path);
 XMLPUBFUN xmlChar * XMLCALL
 		xmlURIEscape		(const xmlChar *str);
+XMLPUBFUN xmlChar * XMLCALL
+		xml3986URIEscape        (const xmlChar *str);
 XMLPUBFUN void XMLCALL
 		xmlFreeURI		(xmlURIPtr uri);
 XMLPUBFUN xmlChar* XMLCALL
diff --git a/python/setup.py b/python/setup.py
index c44269a..1ac4eac 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -8,7 +8,7 @@ from distutils.core import setup, Extension
 # Below ROOT, we expect to find include, include/libxml2, lib and bin.
 # On *nix, it is not needed (but should not harm),
 # on Windows, it is set by configure.js.
-ROOT = r'/usr'
+ROOT = r'/usr/local'
 
 # Thread-enabled libxml2
 with_threads = 1
diff --git a/uri.c b/uri.c
index 3b627e8..16fec64 100644
--- a/uri.c
+++ b/uri.c
@@ -1656,18 +1656,9 @@ xmlURIUnescapeString(const char *str, int len, char *target) {
     return(ret);
 }
 
-/**
- * xmlURIEscapeStr:
- * @str:  string to escape
- * @list: exception list string of chars not to escape
- *
- * This routine escapes a string to hex, ignoring reserved characters (a-z)
- * and the characters in the exception list.
- *
- * Returns a new escaped string or NULL in case of error.
- */
-xmlChar *
-xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
+static xmlChar *
+xmlURIEscapeStrExt(const xmlChar *str, const xmlChar *list,
+                   int func(xmlChar, const xmlChar *)) {
     xmlChar *ret, ch;
     xmlChar *temp;
     const xmlChar *in;
@@ -1701,7 +1692,7 @@ xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
 
 	ch = *in;
 
-	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
+	if (func(ch, list)) {
 	    unsigned char val;
 	    ret[out++] = '%';
 	    val = ch >> 4;
@@ -1724,6 +1715,29 @@ xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
     return(ret);
 }
 
+static int
+xmlURIEscapeChar(xmlChar ch, const xmlChar *list) {
+    if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * xmlURIEscapeStr:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+xmlChar *
+xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
+    return xmlURIEscapeStrExt(str, list, xmlURIEscapeChar);
+}
+
 /**
  * xmlURIEscape:
  * @str:  the string of the URI to escape
@@ -1857,6 +1871,205 @@ xmlURIEscape(const xmlChar * str)
     return (ret);
 }
 
+
+static int
+xml3986SchemeEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_ALPHA(&ch)) && (!ISA_DIGIT(&ch)) &&
+        (!xmlStrchr(BAD_CAST "+-.", ch))) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986AuthorityEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) &&
+        (!xmlStrchr(BAD_CAST ":@[]", ch))) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986UserEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) && (ch != ':')) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986HostEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) &&
+        (!xmlStrchr(BAD_CAST "[]:.", ch))) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986PathEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_PCHAR(&ch)) && (ch != '/')) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986OpaqueEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if (!ISA_UNRESERVED(&ch)) {
+        return 1;
+    }
+    return 0;
+}
+
+static int
+xml3986QueryEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+    if ((!ISA_PCHAR(&ch)) && (!xmlStrchr(BAD_CAST "/?", ch))) {
+        return 1;
+    }
+    return 0;
+}
+
+#define xml3986FragmentEscapeChar xml3986QueryEscapeChar
+
+/**
+ * xmlURIEscape:
+ * @str:  the string of the URI to escape
+ *
+ * Escaping routine, does not do validity checks !
+ * It will try to escape the chars needing this, but this is heuristic
+ * based it's impossible to be sure.
+ *
+ * Returns an copy of the string, but escaped
+ *
+ * Uses RFC 3986 rules to escape URI
+ */
+xmlChar *
+xml3986URIEscape(const xmlChar * str)
+{
+    xmlChar *ret, *segment = NULL;
+    xmlURIPtr uri;
+    int ret2;
+
+#define NULLCHK(p) if(!p) { \
+         xmlURIErrMemory("escaping URI value\n"); \
+         xmlFreeURI(uri); \
+         return NULL; } \
+
+    if (str == NULL)
+        return (NULL);
+
+    uri = xmlCreateURI();
+    if (uri != NULL) {
+	/*
+	 * Allow escaping errors in the unescaped form
+	 */
+        uri->cleanup = 1;
+        ret2 = xmlParseURIReference(uri, (const char *)str);
+        if (ret2) {
+            xmlFreeURI(uri);
+            return (NULL);
+        }
+    }
+
+    if (!uri)
+        return NULL;
+
+    ret = NULL;
+
+    if (uri->scheme) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986SchemeEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, segment);
+        ret = xmlStrcat(ret, BAD_CAST ":");
+        xmlFree(segment);
+    }
+
+    if (uri->authority) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986AuthorityEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, BAD_CAST "//");
+        ret = xmlStrcat(ret, segment);
+        xmlFree(segment);
+    }
+    else {
+
+        if (uri->user) {
+            segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                         NULL, xml3986UserEscapeChar);
+            NULLCHK(segment)
+		    ret = xmlStrcat(ret,BAD_CAST "//");
+            ret = xmlStrcat(ret, segment);
+            ret = xmlStrcat(ret, BAD_CAST "@");
+            xmlFree(segment);
+        }
+
+        if (uri->server) {
+            segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                         NULL, xml3986HostEscapeChar);
+            NULLCHK(segment)
+		    if (uri->user == NULL)
+		        ret = xmlStrcat(ret, BAD_CAST "//");
+            ret = xmlStrcat(ret, segment);
+            xmlFree(segment);
+        }
+
+        if (uri->port) {
+            xmlChar port[10];
+
+            snprintf((char *) port, 10, "%d", uri->port);
+            ret = xmlStrcat(ret, BAD_CAST ":");
+            ret = xmlStrcat(ret, port);
+        }
+    }
+
+    if (uri->path) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986PathEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, segment);
+        xmlFree(segment);
+    }
+
+    if (uri->query_raw) {
+        ret = xmlStrcat(ret, BAD_CAST "?");
+        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
+    }
+    else if (uri->query) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986QueryEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, BAD_CAST "?");
+        ret = xmlStrcat(ret, segment);
+        xmlFree(segment);
+    }
+
+    if (uri->opaque) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986OpaqueEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, segment);
+        xmlFree(segment);
+    }
+
+    if (uri->fragment) {
+        segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+                                     NULL, xml3986FragmentEscapeChar);
+        NULLCHK(segment)
+        ret = xmlStrcat(ret, BAD_CAST "#");
+        ret = xmlStrcat(ret, segment);
+        xmlFree(segment);
+    }
+
+    xmlFreeURI(uri);
+#undef NULLCHK
+
+    return (ret);
+}
+
 /************************************************************************
  *									*
  *			Public functions				*
-- 
2.9.4

_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
xml@gnome.org
https://mail.gnome.org/mailman/listinfo/xml

Reply via email to