GWicke has uploaded a new change for review.
https://gerrit.wikimedia.org/r/273146
Change subject: REST path escaping
......................................................................
REST path escaping
REST APIs like their slashes preserved, while MediaWiki likes them decoded.
This patch adds a REST API normalization variant that decodes everything but
the path delimiters [/?#] as well as % itself. This includes the `unreserved`
set and `sub-delim` classes from RFC3986, as well as those characters from
`gen-delim` that are only used in the host / auth portion of a URL ([:@]).
As discussed on the task, the MediaWiki variant should probably include at
least `unreserved` and much of `gen-delim` as well. This patch does not touch
this just yet, leaving it to a follow-up once consensus on those changes is
reached.
Change-Id: I23d5baf099e4a04aa451efde7cd7d952c202e6cd
---
M templates/varnish/text-frontend.inc.vcl.erb
1 file changed, 78 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/46/273146/1
diff --git a/templates/varnish/text-frontend.inc.vcl.erb
b/templates/varnish/text-frontend.inc.vcl.erb
index 93c197e..6d63d94 100644
--- a/templates/varnish/text-frontend.inc.vcl.erb
+++ b/templates/varnish/text-frontend.inc.vcl.erb
@@ -50,7 +50,7 @@
C{
#include <string.h>
}C
-sub normalize_path {
+sub normalize_mediawiki_path {
/* Rewrite the path part of the URL, replacing unnecessarily escaped
* punctuation with the actual characters. The character list is from
* MediaWiki's wfUrlencode(), so the URLs produced here will be the
same as
@@ -107,6 +107,79 @@
/* Reached the query part. Just copy
the rest of the URL
* to the destination.
*/
+ memcpy(destBuffer + outPos, url + i,
sizeof(char) * (urlLength - i));
+ outPos += urlLength - i;
+ i = urlLength;
+ } else {
+ destBuffer[outPos++] = url[i];
+ }
+ }
+ destBuffer[outPos] = '\0';
+
+ /* Set req.url. This will copy our stack buffer into
the workspace.
+ * VRT_l_req_url() is varadic, and concatenates its
arguments. The
+ * vrt_magic_string_end marks the end of the list.
+ */
+ if (dirty) {
+ VRT_l_req_url(sp, destBuffer,
vrt_magic_string_end);
+ }
+ }
+ #undef NP_IS_HEX
+ #undef NP_HEX_DIGIT
+ #undef NP_HEXCHAR
+ }C
+}
+
+sub normalize_rest_path {
+ /* Rewrite the path part of the URL, replacing unnecessarily escaped
+ * punctuation with the actual characters. The character list is based
on
+ * the discussion in T127387, aiming for typical REST API use cases,
+ * including RESTBase. */
+ C{
+ /* DIY hexadecimal conversion, since it is simple enough for a
fixed
+ * width, and all the relevant standard C library functions
promise to
+ * malfunction if the locale is set to anything other than "C"
+ */
+ #define NP_HEX_DIGIT(c) ( \
+ (c) >= '0' && (c) <= '9' ? (c) - '0' : ( \
+ (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 0x0a : (
\
+ (c) >= 'a' && (c) <= 'f' ? (c) - 'a' +
0x0a : -1 ) ) )
+ #define NP_IS_HEX(c) (NP_HEX_DIGIT(c) != -1)
+ #define NP_HEXCHAR(c1, c2) (char)( (NP_HEX_DIGIT(c1) << 4) |
NP_HEX_DIGIT(c2) )
+ const char * url = VRT_r_req_url(sp);
+ size_t i, outPos;
+ const size_t urlLength = strlen(url);
+ // index for the last position %XX can start at:
+ const size_t lastConvertIdx = urlLength > 2 ? urlLength - 3 : 0;
+ char c;
+ int dirty = 0;
+
+ /* Allocate destination memory from the stack using the C99
+ * variable-length automatic feature. We know the length in
advance
+ * because this function can only shorten the input string.
+ */
+ char destBuffer[urlLength + 1];
+ if (url) {
+ for (i = 0, outPos = 0; i < urlLength; i++) {
+ if (i <= lastConvertIdx && url[i] == '%' &&
NP_IS_HEX(url[i+1]) && NP_IS_HEX(url[i+2])) {
+ c = NP_HEXCHAR(url[i+1], url[i+2]);
+ /* Don't decode the subset of
gen-delims from RFC3986
+ * that apply to the path portion of
URLs */
+ if (c != '/'
+ && c != '?'
+ && c != '#'
+ /* Also protect percents */
+ && c != '%')
+ {
+ destBuffer[outPos++] = c;
+ dirty = 1;
+ i += 2;
+ } else {
+ destBuffer[outPos++] = url[i];
+ }
+ } else if (url[i] == '?' || url[i] == '#') {
+ /* Reached the query part or stray
fragment id. Just
+ * copy the rest of the URL to the
destination. */
memcpy(destBuffer + outPos, url + i,
sizeof(char) * (urlLength - i));
outPos += urlLength - i;
i = urlLength;
@@ -207,8 +280,10 @@
}
// Don't decode percent-encoded slashes in paths for REST APIs
- if (req.url !~ "^/api/rest_v1/" && req.http.host !~
"cxserver|citoid|restbase|^rest\.") {
- call normalize_path;
+ if (req.url ~ "^/api/rest_v1/" || req.http.host ~
"cxserver|citoid|restbase|^rest\.") {
+ call normalize_rest_path;
+ } else {
+ call normalize_mediawiki_path;
}
call mobile_redirect;
--
To view, visit https://gerrit.wikimedia.org/r/273146
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I23d5baf099e4a04aa451efde7cd7d952c202e6cd
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: GWicke <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits