On Thu, Jun 11, 2020 at 1:22 PM Yann Ylavic <[email protected]> wrote:
>
> On Thu, Jun 11, 2020 at 9:57 AM Yann Ylavic <[email protected]> wrote:
> >
> > On Thu, Jun 11, 2020 at 9:50 AM Yann Ylavic <[email protected]> wrote:
> > >
> > > We need a way to forward non %-decoded URLs upto mod_proxy (reverse)
> > > if we want to normalize a second time..
> >
> > IOW, this block in ap_process_request_internal():
> [snip]
> > Should go _after_ the following:
> [snip]
>
> Or we could introduce a new pre_translate_name hook which would
> execute before %-decoding, and be used by mod_proxy when
> "ProxyPreTranslation on" is configured, and be a prerequisite for
> mapping=servlet.
>
> I find ProxyPreTranslation also useful for the non-servlet case btw.
>
> Something like this attached v2 patch.
Here is a v3 with the relevant pre_translate_name hooks only and
ap_getparents() preserved when the URI does not start with '/' (which
makes the patch read better too).
>
> Regards;
> Yann.
Index: include/http_request.h
===================================================================
--- include/http_request.h (revision 1878328)
+++ include/http_request.h (working copy)
@@ -365,6 +365,16 @@ AP_DECLARE_HOOK(int,create_request,(request_rec *r
/**
* This hook allow modules an opportunity to translate the URI into an
+ * actual filename, before URL decoding happens.
+ * rules will be followed.
+ * @param r The current request
+ * @return OK, DECLINED, or HTTP_...
+ * @ingroup hooks
+ */
+AP_DECLARE_HOOK(int,pre_translate_name,(request_rec *r))
+
+/**
+ * This hook allow modules an opportunity to translate the URI into an
* actual filename. If no modules do anything special, the server's default
* rules will be followed.
* @param r The current request
Index: include/httpd.h
===================================================================
--- include/httpd.h (revision 1878328)
+++ include/httpd.h (working copy)
@@ -1779,8 +1779,21 @@ AP_DECLARE(void) ap_no2slash(char *name)
AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
AP_FN_ATTR_NONNULL_ALL;
+#define AP_NORMALIZE_NOT_ABOVE_ROOT (1u << 0)
+#define AP_NORMALIZE_MERGE_SLASHES (1u << 1)
+#define AP_NORMALIZE_PATH_PARAMETERS (1u << 2)
/**
+ * Remove all ////, /./ and /xx/../ substrings from a path, and more
+ * depending on passed in flags.
+ * @param path The path to normalize
+ * @param flags bitmask of AP_NORMALIZE_* flags
+ * @return non-zero on success
+ */
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+ AP_FN_ATTR_NONNULL((1));
+
+/**
* Remove all ./ and xx/../ substrings from a file name. Also remove
* any leading ../ or /../ substrings.
* @param name the file name to parse
Index: modules/dav/main/util.c
===================================================================
--- modules/dav/main/util.c (revision 1878328)
+++ modules/dav/main/util.c (working copy)
@@ -664,7 +664,11 @@ static dav_error * dav_process_if_header(request_r
/* note that parsed_uri.path is allocated; we can trash it */
/* clean up the URI a bit */
- ap_getparents(parsed_uri.path);
+ if (!ap_normalize_path(parsed_uri.path, 0)) {
+ return dav_new_error(r->pool, HTTP_BAD_REQUEST,
+ DAV_ERR_IF_TAGGED, rv,
+ "Invalid URI path tagged If-header.");
+ }
/* the resources we will compare to have unencoded paths */
if (ap_unescape_url(parsed_uri.path) != OK) {
Index: modules/generators/mod_autoindex.c
===================================================================
--- modules/generators/mod_autoindex.c (revision 1878328)
+++ modules/generators/mod_autoindex.c (working copy)
@@ -1266,8 +1266,7 @@ static struct ent *make_parent_entry(apr_int32_t a
if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
return (NULL);
}
- ap_getparents(p->name);
- if (!*p->name) {
+ if (!ap_normalize_path(p->name, 0)) {
return (NULL);
}
Index: modules/generators/mod_info.c
===================================================================
--- modules/generators/mod_info.c (revision 1878328)
+++ modules/generators/mod_info.c (working copy)
@@ -322,6 +322,7 @@ static const hook_lookup_t request_hooks[] = {
{"HTTP Scheme", ap_hook_get_http_scheme},
{"Default Port", ap_hook_get_default_port},
{"Quick Handler", ap_hook_get_quick_handler},
+ {"Pre-Translate Name", ap_hook_get_pre_translate_name},
{"Translate Name", ap_hook_get_translate_name},
{"Map to Storage", ap_hook_get_map_to_storage},
{"Check Access", ap_hook_get_access_checker_ex},
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c (revision 1878328)
+++ modules/proxy/mod_proxy.c (working copy)
@@ -753,6 +753,26 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
mismatch = 1;
use_uri = r->uri;
}
+ else if (dconf->pre_trans > 0
+ && (ent->flags & PROXYPASS_MAPPING_SERVLET)) {
+ char *uri = apr_pstrdup(r->pool, r->uri);
+
+ /* check against the backend servlet url */
+ if (!ap_normalize_path(uri, AP_NORMALIZE_MERGE_SLASHES |
+ AP_NORMALIZE_PATH_PARAMETERS)) {
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "servlet normalization failed for '%s'", r->uri);
+ return HTTP_INTERNAL_SERVER_ERROR;
+ }
+
+ if (!alias_match(uri, ent->fake)) {
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+ "servlet normalization for '%s' mismatch; "
+ "declining '%s'", r->uri, uri);
+ return DECLINED;
+ }
+ }
+
found = apr_pstrcat(r->pool, "proxy:", real, use_uri + len, NULL);
}
}
@@ -795,7 +815,7 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
return DONE;
}
-static int proxy_trans(request_rec *r)
+static int proxy_trans(request_rec *r, int pre)
{
int i;
struct proxy_alias *ent;
@@ -825,6 +845,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
dconf = ap_get_module_config(r->per_dir_config, &proxy_module);
+ if ((pre != 0) ^ (dconf->pre_trans > 0)) {
+ return DECLINED;
+ }
+
/* short way - this location is reverse proxied? */
if (dconf->alias) {
int rv = ap_proxy_trans_match(r, dconf->alias, dconf);
@@ -849,6 +873,15 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
return DECLINED;
}
+static int proxy_pre_translate_name(request_rec *r)
+{
+ return proxy_trans(r, 1);
+}
+static int proxy_translate_name(request_rec *r)
+{
+ return proxy_trans(r, 0);
+}
+
static int proxy_walk(request_rec *r)
{
proxy_server_conf *sconf = ap_get_module_config(r->server->module_config,
@@ -1593,6 +1626,7 @@ static void *create_proxy_dir_config(apr_pool_t *p
new->add_forwarded_headers_set = 0;
new->forward_100_continue = 1;
new->forward_100_continue_set = 0;
+ new->pre_trans = -1; /* unset */
return (void *) new;
}
@@ -1651,6 +1685,9 @@ static void *merge_proxy_dir_config(apr_pool_t *p,
new->forward_100_continue_set = add->forward_100_continue_set
|| base->forward_100_continue_set;
+ new->pre_trans = (add->pre_trans == -1) ? base->pre_trans
+ : add->pre_trans;
+
return new;
}
@@ -1806,6 +1843,9 @@ static const char *
else if (!strcasecmp(word,"noquery")) {
flags |= PROXYPASS_NOQUERY;
}
+ else if (!strcasecmp(word,"mapping=servlet")) {
+ flags |= PROXYPASS_MAPPING_SERVLET;
+ }
else {
char *val = strchr(word, '=');
if (!val) {
@@ -2799,6 +2839,9 @@ static const command_rec proxy_cmds[] =
AP_INIT_FLAG("Proxy100Continue", forward_100_continue, NULL, RSRC_CONF|ACCESS_CONF,
"on if 100-Continue should be forwarded to the origin server, off if the "
"proxy should handle it by itself"),
+ AP_INIT_FLAG("ProxyPreTranslate", ap_set_flag_slot_char,
+ (void*)APR_OFFSETOF(proxy_dir_conf, pre_trans),
+ RSRC_CONF|ACCESS_CONF, "Run URI translation with no URL percent decoding"),
{NULL}
};
@@ -3147,7 +3190,10 @@ static void register_hooks(apr_pool_t *p)
/* handler */
ap_hook_handler(proxy_handler, NULL, NULL, APR_HOOK_FIRST);
/* filename-to-URI translation */
- ap_hook_translate_name(proxy_trans, aszSucc, NULL, APR_HOOK_FIRST);
+ ap_hook_pre_translate_name(proxy_pre_translate_name, aszSucc, NULL,
+ APR_HOOK_MIDDLE);
+ ap_hook_translate_name(proxy_translate_name, aszSucc, NULL,
+ APR_HOOK_FIRST);
/* walk <Proxy > entries and suppress default TRACE behavior */
ap_hook_map_to_storage(proxy_map_location, NULL,NULL, APR_HOOK_FIRST);
/* fixups */
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h (revision 1878328)
+++ modules/proxy/mod_proxy.h (working copy)
@@ -123,6 +123,7 @@ struct proxy_remote {
#define PROXYPASS_NOCANON 0x01
#define PROXYPASS_INTERPOLATE 0x02
#define PROXYPASS_NOQUERY 0x04
+#define PROXYPASS_MAPPING_SERVLET 0x08
struct proxy_alias {
const char *real;
const char *fake;
@@ -243,6 +244,9 @@ typedef struct {
unsigned int forward_100_continue_set:1;
apr_array_header_t *error_override_codes;
+
+ /** Whether to run translation in pre_translate_name hook */
+ int pre_trans;
} proxy_dir_conf;
/* if we interpolate env vars per-request, we'll need a per-request
Index: server/protocol.c
===================================================================
--- server/protocol.c (revision 1878328)
+++ server/protocol.c (working copy)
@@ -627,6 +627,14 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
}
else {
status = apr_uri_parse(r->pool, uri, &r->parsed_uri);
+ if (status == APR_SUCCESS
+ && (r->parsed_uri.path != NULL)
+ && (r->parsed_uri.path[0] != '/')
+ && (r->method_number != M_OPTIONS
+ || strcmp(r->parsed_uri.path, "*") != 0)) {
+ /* Invalid request-target per rfc7230#section-5.3 */
+ status = APR_EINVAL;
+ }
}
if (status == APR_SUCCESS) {
@@ -640,8 +648,15 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
}
r->args = r->parsed_uri.query;
- r->uri = r->parsed_uri.path ? r->parsed_uri.path
- : apr_pstrdup(r->pool, "/");
+ if (r->parsed_uri.path) {
+ r->uri = r->parsed_uri.path;
+ }
+ else if (r->method_number == M_OPTIONS) {
+ r->uri = apr_pstrdup(r->pool, "*");
+ }
+ else {
+ r->uri = apr_pstrdup(r->pool, "/");
+ }
#if defined(OS2) || defined(WIN32)
/* Handle path translations for OS/2 and plug security hole.
Index: server/request.c
===================================================================
--- server/request.c (revision 1878328)
+++ server/request.c (working copy)
@@ -59,6 +59,7 @@
#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
APR_HOOK_STRUCT(
+ APR_HOOK_LINK(pre_translate_name)
APR_HOOK_LINK(translate_name)
APR_HOOK_LINK(map_to_storage)
APR_HOOK_LINK(check_user_id)
@@ -74,6 +75,8 @@ APR_HOOK_STRUCT(
APR_HOOK_LINK(force_authn)
)
+AP_IMPLEMENT_HOOK_RUN_FIRST(int,pre_translate_name,
+ (request_rec *r), (r), DECLINED)
AP_IMPLEMENT_HOOK_RUN_FIRST(int,translate_name,
(request_rec *r), (r), DECLINED)
AP_IMPLEMENT_HOOK_RUN_FIRST(int,map_to_storage,
@@ -170,11 +173,35 @@ AP_DECLARE(int) ap_process_request_internal(reques
core_server_config *sconf =
ap_get_core_module_config(r->server->module_config);
- /* Ignore embedded %2F's in path for proxy requests */
- if (!r->proxyreq && r->parsed_uri.path) {
+ if (r->parsed_uri.path) {
+ unsigned int normalize_flags = 0;
+ if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) {
+ normalize_flags |= AP_NORMALIZE_MERGE_SLASHES;
+ }
+
+ /* Shrinking transformations */
+ if (!ap_normalize_path(r->parsed_uri.path, normalize_flags)) {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+ "invalid URI path (%s)", r->uri);
+ return HTTP_BAD_REQUEST;
+ }
+
+ /* Apply transformations */
+ r->uri = r->parsed_uri.path;
+ }
+
+ /* Let pre_translate_name hooks own non-decoded URIs */
+ access_status = ap_run_pre_translate_name(r);
+ if (access_status != OK && access_status != DECLINED) {
+ return access_status;
+ }
+
+ /* Ignore URL unescaping for translated URIs already */
+ if (access_status == DECLINED && r->parsed_uri.path) {
d = ap_get_core_module_config(r->per_dir_config);
if (d->allow_encoded_slashes) {
- access_status = ap_unescape_url_keep2f(r->parsed_uri.path, d->decode_encoded_slashes);
+ access_status = ap_unescape_url_keep2f(r->parsed_uri.path,
+ d->decode_encoded_slashes);
}
else {
access_status = ap_unescape_url(r->parsed_uri.path);
@@ -185,21 +212,16 @@ AP_DECLARE(int) ap_process_request_internal(reques
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00026)
"found %%2f (encoded '/') in URI "
"(decoded='%s'), returning 404",
- r->parsed_uri.path);
+ r->uri);
}
}
return access_status;
}
+
+ /* Apply transformations */
+ r->uri = r->parsed_uri.path;
}
- ap_getparents(r->uri); /* OK --- shrinking transformations... */
- if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) {
- ap_no2slash(r->uri);
- if (r->parsed_uri.path) {
- ap_no2slash(r->parsed_uri.path);
- }
- }
-
/* All file subrequests are a huge pain... they cannot bubble through the
* next several steps. Only file subrequests are allowed an empty uri,
* otherwise let translate_name kill the request.
Index: server/util.c
===================================================================
--- server/util.c (revision 1878328)
+++ server/util.c (working copy)
@@ -77,7 +77,7 @@
#include "test_char.h"
/* Win32/NetWare/OS2 need to check for both forward and back slashes
- * in ap_getparents() and ap_escape_url.
+ * in ap_normalize_path() and ap_escape_url().
*/
#ifdef CASE_BLIND_FILESYSTEM
#define IS_SLASH(s) ((s == '/') || (s == '\\'))
@@ -492,8 +492,108 @@ AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t
return rc;
}
+/* Forward declare */
+static char x2c(const char *what);
+
+#define IS_SLASH_OR_NUL(s) (s == '\0' || IS_SLASH(s))
+
+static int normalize_path(char *path, unsigned int flags)
+{
+ apr_size_t l, w;
+
+ /* We assume path[0] == '/' below */
+ if (!IS_SLASH(path[0])) {
+ /* Besides "OPTIONS *" requests, a request-target path should start
+ * with '/' (RFC-7230 section-5.3), so anything else is invalid.
+ */
+ return (path[0] == '*' && path[1] == '\0');
+ }
+
+ for (l = w = 1; path[l] != '\0';) {
+ if ((flags & AP_NORMALIZE_PATH_PARAMETERS) && path[l] == ';') {
+ /* Remove path parameters ;foo=bar/ from any path segment */
+ do {
+ l++;
+ } while (!IS_SLASH_OR_NUL(path[l]));
+ continue;
+ }
+
+ /* RFC-3986 section 2.3:
+ * For consistency, percent-encoded octets in the ranges of
+ * ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+ * period (%2E), underscore (%5F), or tilde (%7E) should [...]
+ * be decoded to their corresponding unreserved characters by
+ * URI normalizers.
+ */
+ if (path[l] == '%' && apr_isxdigit(path[l + 1])
+ && apr_isxdigit(path[l + 2])) {
+ const char c = x2c(&path[l + 1]);
+ if (apr_isalnum(c) || (c && strchr("-._~", c))) {
+ /* Replace last char and fall through as the current
+ * read position */
+ l += 2;
+ path[l] = c;
+ }
+ }
+
+ if (IS_SLASH(path[w - 1])) {
+ if ((flags & AP_NORMALIZE_MERGE_SLASHES) && IS_SLASH(path[l])) {
+ /* Collapse ///// sequences to / */
+ do {
+ l++;
+ } while (IS_SLASH(path[l]));
+ continue;
+ }
+
+ if (path[l] == '.') {
+ if (IS_SLASH_OR_NUL(path[l + 1])) {
+ /* Remove /./ segments */
+ l++;
+ if (path[l]) {
+ l++;
+ }
+ continue;
+ }
+
+ if (path[l + 1] == '.' && IS_SLASH_OR_NUL(path[l + 2])) {
+ /* Remove /xx/../ segments */
+ if (w == 1) {
+ if (flags & AP_NORMALIZE_NOT_ABOVE_ROOT) {
+ return 0;
+ }
+ l += 2;
+ continue;
+ }
+
+ /* Wind w back to remove the previous segment */
+ do {
+ w--;
+ } while (!IS_SLASH(path[w - 1]));
+
+ /* Move l forward to the next segment */
+ l += 2;
+ if (path[l]) {
+ l++;
+ }
+ continue;
+ }
+ }
+ }
+
+ path[w++] = path[l++];
+ }
+ path[w] = '\0';
+
+ return 1;
+}
+
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+{
+ return normalize_path(path, flags);
+}
+
/*
- * Parse .. so we don't compromise security
+ * Normalize .. so we don't compromise security
*/
AP_DECLARE(void) ap_getparents(char *name)
{
@@ -500,6 +600,14 @@ AP_DECLARE(void) ap_getparents(char *name)
char *next;
int l, w, first_dot;
+ /* If the path starts with '/', ap_normalize_path() is faster.
+ * Otherthise fallback to legacy four passes.
+ */
+ if (IS_SLASH(name[0])) {
+ (void)normalize_path(name, 0);
+ return;
+ }
+
/* Four paseses, as per RFC 1808 */
/* a) remove ./ path segments */
for (next = name; *next && (*next != '.'); next++) {
@@ -561,6 +669,7 @@ AP_DECLARE(void) ap_getparents(char *name)
name[l] = '\0';
}
}
+
AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
{