Hello,
I opened bug https://bugzilla.gnome.org/show_bug.cgi?id=784894 to add
an escaping function that lives up to modern times escaping aka 12 year
old RFC3986 definition.
This was prompted by this downstream bug: https://bugzilla.redhat.com/s
how_bug.cgi?id=1458237 and the very good investigative work from John
Dennis.
I would like to know if this is the right/acceptable approach to deal
with this issue as I would like to backport (or reimplement in the
downstream project until available in libxml2) this patch to the
relevant downstream packages in order to fix the above mentioned bug.
I am attaching the patch here as well.
HTH,
Simo.
From 2d210f10ebc98d13dc35b172636efecb91f1b92a Mon Sep 17 00:00:00 2001
From: Simo Sorce <s...@redhat.com>
Date: Tue, 11 Jul 2017 07:46:06 -0400
Subject: [PATCH] Add 3986 compatible URI Escape function
RFC 3986 changed the set of (un)reserved characters, so we provide
a function that uses that RFC to generate an escaped URI. The original
function is preserved for backwards compatibility.
Signed-off-by: Simo Sorce <s...@redhat.com>
---
include/libxml/uri.h | 2 +
python/setup.py | 2 +-
uri.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 229 insertions(+), 14 deletions(-)
diff --git a/include/libxml/uri.h b/include/libxml/uri.h
index db48262..c9321a5 100644
--- a/include/libxml/uri.h
+++ b/include/libxml/uri.h
@@ -81,6 +81,8 @@ XMLPUBFUN int XMLCALL
xmlNormalizeURIPath (char *path);
XMLPUBFUN xmlChar * XMLCALL
xmlURIEscape (const xmlChar *str);
+XMLPUBFUN xmlChar * XMLCALL
+ xml3986URIEscape (const xmlChar *str);
XMLPUBFUN void XMLCALL
xmlFreeURI (xmlURIPtr uri);
XMLPUBFUN xmlChar* XMLCALL
diff --git a/python/setup.py b/python/setup.py
index c44269a..1ac4eac 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -8,7 +8,7 @@ from distutils.core import setup, Extension
# Below ROOT, we expect to find include, include/libxml2, lib and bin.
# On *nix, it is not needed (but should not harm),
# on Windows, it is set by configure.js.
-ROOT = r'/usr'
+ROOT = r'/usr/local'
# Thread-enabled libxml2
with_threads = 1
diff --git a/uri.c b/uri.c
index 3b627e8..16fec64 100644
--- a/uri.c
+++ b/uri.c
@@ -1656,18 +1656,9 @@ xmlURIUnescapeString(const char *str, int len, char *target) {
return(ret);
}
-/**
- * xmlURIEscapeStr:
- * @str: string to escape
- * @list: exception list string of chars not to escape
- *
- * This routine escapes a string to hex, ignoring reserved characters (a-z)
- * and the characters in the exception list.
- *
- * Returns a new escaped string or NULL in case of error.
- */
-xmlChar *
-xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
+static xmlChar *
+xmlURIEscapeStrExt(const xmlChar *str, const xmlChar *list,
+ int func(xmlChar, const xmlChar *)) {
xmlChar *ret, ch;
xmlChar *temp;
const xmlChar *in;
@@ -1701,7 +1692,7 @@ xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
ch = *in;
- if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
+ if (func(ch, list)) {
unsigned char val;
ret[out++] = '%';
val = ch >> 4;
@@ -1724,6 +1715,29 @@ xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
return(ret);
}
+static int
+xmlURIEscapeChar(xmlChar ch, const xmlChar *list) {
+ if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * xmlURIEscapeStr:
+ * @str: string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+xmlChar *
+xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
+ return xmlURIEscapeStrExt(str, list, xmlURIEscapeChar);
+}
+
/**
* xmlURIEscape:
* @str: the string of the URI to escape
@@ -1857,6 +1871,205 @@ xmlURIEscape(const xmlChar * str)
return (ret);
}
+
+static int
+xml3986SchemeEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_ALPHA(&ch)) && (!ISA_DIGIT(&ch)) &&
+ (!xmlStrchr(BAD_CAST "+-.", ch))) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986AuthorityEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) &&
+ (!xmlStrchr(BAD_CAST ":@[]", ch))) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986UserEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) && (ch != ':')) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986HostEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_UNRESERVED(&ch)) && (!ISA_SUB_DELIM(&ch)) &&
+ (!xmlStrchr(BAD_CAST "[]:.", ch))) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986PathEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_PCHAR(&ch)) && (ch != '/')) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986OpaqueEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if (!ISA_UNRESERVED(&ch)) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+xml3986QueryEscapeChar(xmlChar ch, const xmlChar *unused ATTRIBUTE_UNUSED) {
+ if ((!ISA_PCHAR(&ch)) && (!xmlStrchr(BAD_CAST "/?", ch))) {
+ return 1;
+ }
+ return 0;
+}
+
+#define xml3986FragmentEscapeChar xml3986QueryEscapeChar
+
+/**
+ * xmlURIEscape:
+ * @str: the string of the URI to escape
+ *
+ * Escaping routine, does not do validity checks !
+ * It will try to escape the chars needing this, but this is heuristic
+ * based it's impossible to be sure.
+ *
+ * Returns an copy of the string, but escaped
+ *
+ * Uses RFC 3986 rules to escape URI
+ */
+xmlChar *
+xml3986URIEscape(const xmlChar * str)
+{
+ xmlChar *ret, *segment = NULL;
+ xmlURIPtr uri;
+ int ret2;
+
+#define NULLCHK(p) if(!p) { \
+ xmlURIErrMemory("escaping URI value\n"); \
+ xmlFreeURI(uri); \
+ return NULL; } \
+
+ if (str == NULL)
+ return (NULL);
+
+ uri = xmlCreateURI();
+ if (uri != NULL) {
+ /*
+ * Allow escaping errors in the unescaped form
+ */
+ uri->cleanup = 1;
+ ret2 = xmlParseURIReference(uri, (const char *)str);
+ if (ret2) {
+ xmlFreeURI(uri);
+ return (NULL);
+ }
+ }
+
+ if (!uri)
+ return NULL;
+
+ ret = NULL;
+
+ if (uri->scheme) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986SchemeEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, segment);
+ ret = xmlStrcat(ret, BAD_CAST ":");
+ xmlFree(segment);
+ }
+
+ if (uri->authority) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986AuthorityEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, BAD_CAST "//");
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+ else {
+
+ if (uri->user) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986UserEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret,BAD_CAST "//");
+ ret = xmlStrcat(ret, segment);
+ ret = xmlStrcat(ret, BAD_CAST "@");
+ xmlFree(segment);
+ }
+
+ if (uri->server) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986HostEscapeChar);
+ NULLCHK(segment)
+ if (uri->user == NULL)
+ ret = xmlStrcat(ret, BAD_CAST "//");
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+
+ if (uri->port) {
+ xmlChar port[10];
+
+ snprintf((char *) port, 10, "%d", uri->port);
+ ret = xmlStrcat(ret, BAD_CAST ":");
+ ret = xmlStrcat(ret, port);
+ }
+ }
+
+ if (uri->path) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986PathEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+
+ if (uri->query_raw) {
+ ret = xmlStrcat(ret, BAD_CAST "?");
+ ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
+ }
+ else if (uri->query) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986QueryEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, BAD_CAST "?");
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+
+ if (uri->opaque) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986OpaqueEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+
+ if (uri->fragment) {
+ segment = xmlURIEscapeStrExt(BAD_CAST uri->scheme,
+ NULL, xml3986FragmentEscapeChar);
+ NULLCHK(segment)
+ ret = xmlStrcat(ret, BAD_CAST "#");
+ ret = xmlStrcat(ret, segment);
+ xmlFree(segment);
+ }
+
+ xmlFreeURI(uri);
+#undef NULLCHK
+
+ return (ret);
+}
+
/************************************************************************
* *
* Public functions *
--
2.9.4
_______________________________________________
xml mailing list, project page http://xmlsoft.org/
xml@gnome.org
https://mail.gnome.org/mailman/listinfo/xml