rse 97/08/05 03:49:45
Modified: src CHANGES mod_rewrite.c mod_rewrite.h
htdocs/manual/mod mod_rewrite.html
Log:
Enhanced and cleaned up the URL rewriting engine for 1.3:
First the grouped parts of RewriteRule pattern matches (parenthesis!) can be
accessed now via backreferences $1..$9 in RewriteConds test-against strings in
addition to RewriteRules subst string. Second the grouped parts of RewriteCond
pattern matches (parenthesis!) can be accessed now via backreferences %1..%9
both in following RewriteCond test-against strings and RewriteRules subst
string. This provides maximum flexibility through the use of backreferences.
Additionally the rewriting engine was cleaned up by putting common code to the
new expand_backrefs_inbuffer() function.
Submitted by: Ralf S. Engelschall
Reviewed by: Dean Gaudet, Ralf S. Engelschall
Revision Changes Path
1.385 +12 -0 apache/src/CHANGES
Index: CHANGES
===================================================================
RCS file: /export/home/cvs/apache/src/CHANGES,v
retrieving revision 1.384
retrieving revision 1.385
diff -u -r1.384 -r1.385
--- CHANGES 1997/08/05 06:52:18 1.384
+++ CHANGES 1997/08/05 10:49:39 1.385
@@ -1,5 +1,17 @@
Changes with Apache 1.3a2
+ *) Enhanced and cleaned up the URL rewriting engine of mod_rewrite:
+ First the grouped parts of RewriteRule pattern matches (parenthesis!)
can
+ be accessed now via backreferences $1..$9 in RewriteConds test-against
+ strings in addition to RewriteRules subst string. Second the grouped
+ parts of RewriteCond pattern matches (parenthesis!) can be accessed now
+ via backreferences %1..%9 both in following RewriteCond test-against
+ strings and RewriteRules subst string. This provides maximum flexibility
+ through the use of backreferences.
+ Additionally the rewriting engine was cleaned up by putting common
+ code to the new expand_backrefs_inbuffer() function.
+ [Ralf S. Engelschall]
+
*) When merging the main server's <Directory> and <Location> sections into
a vhost, put the main server's first and the vhost's second. Otherwise
the vhost can't override the main server. [Dean Gaudet] PR#717
1.46 +124 -70 apache/src/mod_rewrite.c
Index: mod_rewrite.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_rewrite.c,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -r1.45 -r1.46
--- mod_rewrite.c 1997/08/01 09:36:42 1.45
+++ mod_rewrite.c 1997/08/05 10:49:40 1.46
@@ -1442,16 +1442,17 @@
char newuri[MAX_STRING_LEN];
char env[MAX_STRING_LEN];
char port[32];
- char env2[MAX_STRING_LEN];
regex_t *regexp;
regmatch_t regmatch[10];
- int rc;
+ backrefinfo *briRR = NULL;
+ backrefinfo *briRC = NULL;
int prefixstrip;
- int i;
int failed;
array_header *rewriteconds;
rewritecond_entry *conds;
rewritecond_entry *c;
+ int i;
+ int rc;
uri = r->filename;
regexp = p->regexp;
@@ -1481,6 +1482,24 @@
if (( rc && !(p->flags & RULEFLAG_NOTMATCH)) ||
(!rc && (p->flags & RULEFLAG_NOTMATCH)) ) {
+ /* create the RewriteRule regsubinfo */
+ briRR = (backrefinfo *)palloc(r->pool, sizeof(backrefinfo));
+ if (!rc && (p->flags & RULEFLAG_NOTMATCH)) {
+ briRR->source = "";
+ briRR->nsub = 0;
+ }
+ else {
+ briRR->source = pstrdup(r->pool, uri);
+ briRR->nsub = regexp->re_nsub;
+ memcpy((void *)(briRR->regmatch), (void *)(regmatch),
sizeof(regmatch_t)*10);
+ }
+
+ /* create the RewriteCond backrefinfo, but
+ initialized as empty backrefinfo, i.e. not subst */
+ briRC = (backrefinfo *)pcalloc(r->pool, sizeof(backrefinfo));
+ briRC->source = "";
+ briRC->nsub = 0;
+
/* ok, the pattern matched, but we now additionally have to check
for any preconditions which have to be also true. We do this
at this very late stage to avoid unnessesary checks which
@@ -1490,7 +1509,7 @@
failed = 0;
for (i = 0; i < rewriteconds->nelts; i++) {
c = &conds[i];
- rc = apply_rewrite_cond(r, c, perdir);
+ rc = apply_rewrite_cond(r, c, perdir, briRR, briRC);
if (c->flags & CONDFLAG_ORNEXT) {
/* there is a "or" flag */
if (rc == 0) {
@@ -1523,10 +1542,10 @@
if (strcmp(output, "-") == 0) {
/* but before we set the env variables... */
for (i = 0; p->env[i] != NULL; i++) {
- strncpy(env2, p->env[i], sizeof(env2)-1);
- EOS_PARANOIA(env2);
- strncpy(env, pregsub(r->pool, env2, uri, regexp->re_nsub+1,
regmatch), sizeof(env)-1); /* substitute in output */
+ strncpy(env, p->env[i], sizeof(env)-1);
EOS_PARANOIA(env);
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRR,
'$');
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRC,
'%');
add_env_variable(r, env);
}
return 2;
@@ -1534,27 +1553,13 @@
/* if this is a forced proxy request ... */
if (p->flags & RULEFLAG_PROXY) {
- if (p->flags & RULEFLAG_NOTMATCH) {
- output = pstrcat(r->pool, "proxy:", output, NULL);
- strncpy(newuri, output, sizeof(newuri)-1);
- EOS_PARANOIA(newuri);
- expand_variables_inbuffer(r, newuri, sizeof(newuri));/*
expand %{...} */
- expand_map_lookups(r, newuri, sizeof(newuri)); /*
expand ${...} */
- }
- else {
- output = pstrcat(r->pool, "proxy:", output, NULL);
- strncpy(newuri, pregsub(r->pool, output, uri,
regexp->re_nsub+1, regmatch), sizeof(newuri)-1); /* substitute in output */
- EOS_PARANOIA(newuri);
- for (i = 0; p->env[i] != NULL; i++) {
- strncpy(env2, p->env[i], sizeof(env2)-1);
- EOS_PARANOIA(env2);
- strncpy(env, pregsub(r->pool, env2, uri,
regexp->re_nsub+1, regmatch), sizeof(env)-1); /* substitute in output */
- EOS_PARANOIA(env);
- add_env_variable(r, env);
- }
- expand_variables_inbuffer(r, newuri, sizeof(newuri)); /*
expand %{...} */
- expand_map_lookups(r, newuri, sizeof(newuri)); /*
expand ${...} */
- }
+ output = pstrcat(r->pool, "proxy:", output, NULL);
+ strncpy(newuri, output, sizeof(newuri)-1);
+ EOS_PARANOIA(newuri);
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRR,
'$'); /* expand $N */
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRC,
'%'); /* expand %N */
+ expand_variables_inbuffer(r, newuri, sizeof(newuri));
/* expand %{...} */
+ expand_map_lookups(r, newuri, sizeof(newuri));
/* expand ${...} */
if (perdir == NULL)
rewritelog(r, 2, "rewrite %s -> %s", r->filename, newuri);
else
@@ -1570,24 +1575,18 @@
|| (i > 8 && strncmp(output, "https://", 8) == 0)
|| (i > 9 && strncmp(output, "gopher://", 9) == 0)
|| (i > 6 && strncmp(output, "ftp://", 6) == 0) ) ) {
- if (p->flags & RULEFLAG_NOTMATCH) {
- strncpy(newuri, output, sizeof(newuri)-1);
- EOS_PARANOIA(newuri);
- expand_variables_inbuffer(r, newuri, sizeof(newuri));/*
expand %{...} */
- expand_map_lookups(r, newuri, sizeof(newuri)); /*
expand ${...} */
- }
- else {
- strncpy(newuri, pregsub(r->pool, output, uri,
regexp->re_nsub+1, regmatch), sizeof(newuri)-1); /* substitute in output */
- EOS_PARANOIA(newuri);
- for (i = 0; p->env[i] != NULL; i++) {
- strncpy(env2, p->env[i], sizeof(env2)-1);
- EOS_PARANOIA(env2);
- strncpy(env, pregsub(r->pool, env2, uri,
regexp->re_nsub+1, regmatch), sizeof(env)-1); /* substitute in output */
- EOS_PARANOIA(env);
- add_env_variable(r, env);
- }
- expand_variables_inbuffer(r, newuri, sizeof(newuri));/*
expand %{...} */
- expand_map_lookups(r, newuri, sizeof(newuri)); /*
expand ${...} */
+ strncpy(newuri, output, sizeof(newuri)-1);
+ EOS_PARANOIA(newuri);
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRR,
'$'); /* expand $N */
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRC,
'%'); /* expand %N */
+ expand_variables_inbuffer(r, newuri, sizeof(newuri));
/* expand %{...} */
+ expand_map_lookups(r, newuri, sizeof(newuri));
/* expand ${...} */
+ for (i = 0; p->env[i] != NULL; i++) {
+ strncpy(env, p->env[i], sizeof(env)-1);
+ EOS_PARANOIA(env);
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRR,
'$');
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRC,
'%');
+ add_env_variable(r, env);
}
rewritelog(r, 2, "[per-dir %s] redirect %s -> %s", perdir,
r->filename, newuri);
r->filename = pstrdup(r->pool, newuri);
@@ -1603,25 +1602,20 @@
output = pstrcat(r->pool, perdir, output, NULL);
}
- if (p->flags & RULEFLAG_NOTMATCH) {
- /* just overtake the URI */
- strncpy(newuri, output, sizeof(newuri)-1);
- EOS_PARANOIA(newuri);
+ /* standard case: create the substitution string */
+ strncpy(newuri, output, sizeof(newuri)-1);
+ EOS_PARANOIA(newuri);
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRR,
'$'); /* expand $N */
+ expand_backref_inbuffer(r->pool, newuri, sizeof(newuri), briRC,
'%'); /* expand %N */
+ expand_variables_inbuffer(r, newuri, sizeof(newuri));
/* expand %{...} */
+ expand_map_lookups(r, newuri, sizeof(newuri));
/* expand ${...} */
+ for (i = 0; p->env[i] != NULL; i++) {
+ strncpy(env, p->env[i], sizeof(env)-1);
+ EOS_PARANOIA(env);
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRR, '$');
+ expand_backref_inbuffer(r->pool, env, sizeof(env), briRC, '%');
+ add_env_variable(r, env);
}
- else {
- /* substitute in output */
- strncpy(newuri, pregsub(r->pool, output, uri, regexp->re_nsub+1,
regmatch), sizeof(newuri)-1); /* substitute in output */
- EOS_PARANOIA(newuri);
- for (i = 0; p->env[i] != NULL; i++) {
- strncpy(env2, p->env[i], sizeof(env2)-1);
- EOS_PARANOIA(env2);
- strncpy(env, pregsub(r->pool, env2, uri, regexp->re_nsub+1,
regmatch), sizeof(env)-1); /* substitute in output */
- EOS_PARANOIA(env);
- add_env_variable(r, env);
- }
- }
- expand_variables_inbuffer(r, newuri, sizeof(newuri)); /* expand
%{...} */
- expand_map_lookups(r, newuri, sizeof(newuri)); /* expand
${...} */
if (perdir == NULL)
rewritelog(r, 2, "rewrite %s -> %s", uri, newuri);
@@ -1694,15 +1688,30 @@
return 0;
}
-static int apply_rewrite_cond(request_rec *r, rewritecond_entry *p, char
*perdir)
+static int apply_rewrite_cond(request_rec *r, rewritecond_entry *p, char
*perdir, backrefinfo *briRR, backrefinfo *briRC)
{
- char *input;
- int rc;
+ char input[MAX_STRING_LEN];
struct stat sb;
request_rec *rsub;
+ regmatch_t regmatch[10];
+ int rc;
- /* first, we have to expand the input string to match */
- input = expand_variables(r, p->input);
+ /*
+ * Construct the string we match against
+ */
+
+ /* expand the regex backreferences from the RewriteRule ($0-$9),
+ then from the last RewriteCond (%0-%9) and then expand the
+ variables (%{....}) */
+ strncpy(input, p->input, sizeof(input)-1);
+ EOS_PARANOIA(input);
+ expand_backref_inbuffer(r->pool, input, sizeof(input), briRR, '$');
+ expand_backref_inbuffer(r->pool, input, sizeof(input), briRC, '%');
+ expand_variables_inbuffer(r, input, sizeof(input));
+
+ /*
+ * Apply the patterns
+ */
rc = 0;
if (strcmp(p->pattern, "-f") == 0) {
@@ -1791,7 +1800,15 @@
}
else {
/* it is really a regexp pattern, so apply it */
- rc = (regexec(p->regexp, input, 0, NULL, 0) == 0);
+ rc = (regexec(p->regexp, input, p->regexp->re_nsub+1, regmatch, 0)
== 0);
+
+ /* if it isn't a negated pattern and really matched
+ we update the passed-through regex subst info structure */
+ if (rc && !(p->flags & CONDFLAG_NOTMATCH)) {
+ briRC->source = pstrdup(r->pool, input);
+ briRC->nsub = p->regexp->re_nsub;
+ memcpy((void *)(briRC->regmatch), (void *)(regmatch),
sizeof(regmatch_t)*10);
+ }
}
/* if this is a non-matching regexp, just negate the result */
@@ -1937,6 +1954,43 @@
}
}
return;
+}
+
+
+/*
+**
+** Expand the %0-%9 or $0-$9 regex backreferences
+**
+*/
+
+static void expand_backref_inbuffer(pool *p, char *buf, int nbuf,
backrefinfo *bri, char c)
+{
+ int i;
+
+ if (bri->nsub < 1)
+ return;
+
+ if (c != '$') {
+ /* safe existing $N backrefs and replace <c>N with $N backrefs */
+ for (i = 0; buf[i] != '\0' && i < nbuf; i++) {
+ if (buf[i] == '$' && (buf[i+1] >= '0' && buf[i+1] <= '9'))
+ buf[i++] = '\001';
+ else if (buf[i] == c && (buf[i+1] >= '0' && buf[i+1] <= '9'))
+ buf[i++] = '$';
+ }
+ }
+
+ /* now apply the pregsub() function */
+ strncpy(buf, pregsub(p, buf, bri->source,
+ bri->nsub+1, bri->regmatch), nbuf-1);
+ EOS_PARANOIA_SIZE(buf, nbuf);
+
+ if (c != '$') {
+ /* restore the original $N backrefs */
+ for (i = 0; buf[i] != '\0' && i < nbuf; i++)
+ if (buf[i] == '\001' && (buf[i+1] >= '0' && buf[i+1] <= '9'))
+ buf[i++] = '$';
+ }
}
1.30 +13 -2 apache/src/mod_rewrite.h
Index: mod_rewrite.h
===================================================================
RCS file: /export/home/cvs/apache/src/mod_rewrite.h,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -r1.29 -r1.30
--- mod_rewrite.h 1997/07/29 12:09:57 1.29
+++ mod_rewrite.h 1997/08/05 10:49:41 1.30
@@ -205,7 +205,8 @@
#define MAX_ENV_FLAGS 5
-#define EOS_PARANOIA(ca) ca[sizeof(ca)-1] = '\0'
+#define EOS_PARANOIA(ca) ca[sizeof(ca)-1] = '\0'
+#define EOS_PARANOIA_SIZE(cp, size) cp[size-1] = '\0'
/*
@@ -292,6 +293,15 @@
array_header *lists;
} cache;
+ /* the regex structure for the
+ substitution of backreferences */
+
+typedef struct backrefinfo {
+ char *source;
+ int nsub;
+ regmatch_t regmatch[10];
+} backrefinfo;
+
/*
**
@@ -335,11 +345,12 @@
/* rewriting engine */
static int apply_rewrite_list(request_rec *r, array_header *rewriterules,
char *perdir);
static int apply_rewrite_rule(request_rec *r, rewriterule_entry *p, char
*perdir);
-static int apply_rewrite_cond(request_rec *r, rewritecond_entry *p, char
*perdir);
+static int apply_rewrite_cond(request_rec *r, rewritecond_entry *p, char
*perdir, backrefinfo *briRR, backrefinfo *briRC);
/* URI transformation function */
static void splitout_queryargs(request_rec *r, int qsappend);
static void reduce_uri(request_rec *r);
+static void expand_backref_inbuffer(pool *p, char *buf, int nbuf,
backrefinfo *bri, char c);
static char *expand_tildepaths(request_rec *r, char *uri);
static void expand_map_lookups(request_rec *r, char *uri, int uri_len);
1.16 +41 -12 apache/htdocs/manual/mod/mod_rewrite.html
Index: mod_rewrite.html
===================================================================
RCS file: /export/home/cvs/apache/htdocs/manual/mod/mod_rewrite.html,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- mod_rewrite.html 1997/08/01 12:00:18 1.15
+++ mod_rewrite.html 1997/08/05 10:49:44 1.16
@@ -471,7 +471,34 @@
state of the URI <b>AND</b> if these additional conditions apply, too.
<p>
-<em>TestString</em> is a string which contains server-variables of the form
+<em>TestString</em> is a string which can contains the following
+expanded constructs in addition to plain text:
+
+<ul>
+<li><b>RewriteRule backreferences</b>: These are backreferences of the form
+
+<blockquote><strong>
+<tt>$N</tt>
+</strong></blockquote>
+
+(1 <= N <= 9) which provide access to the grouped parts (parenthesis!) of the
+pattern from the corresponding <tt>RewriteRule</tt> directive (the one
+following the current bunch of <tt>RewriteCond</tt> directives).
+
+<p>
+<li><b>RewriteCond backreferences</b>: These are backreferences of the form
+
+<blockquote><strong>
+<tt>%N</tt>
+</strong></blockquote>
+
+(1 <= N <= 9) which provide access to the grouped parts (parenthesis!) of the
+pattern from the last matched <tt>RewriteCond</tt> directive in the current
+bunch of conditions.
+
+<p>
+<li><b>Server-Variables</b>: These are variables
+ of the form
<blockquote><strong>
<tt>%{</tt> <em>NAME_OF_VARIABLE</em> <tt>}</tt>
@@ -554,7 +581,6 @@
</tr>
</table>
-
<p>
<table width="70%" border=0 bgcolor="#f0f0f0" cellspacing=0 cellpadding=10>
<tr><td>
@@ -564,6 +590,8 @@
</td></tr>
</table>
+</ul>
+
<p>
Special Notes:
<ol>
@@ -796,7 +824,8 @@
<em>Pattern</em> matched. Beside plain text you can use
<ol>
-<li>pattern-group back-references (<code>$N</code>)
+<li>back-references <code>$N</code> to the RewriteRule pattern
+<li>back-references <code>%N</code> to the last matched RewriteCond pattern
<li>server-variables as in rule condition test-strings
(<code>%{VARNAME}</code>)
<li><a href="#mapfunc">mapping-function</a> calls
(<code>${mapname:key|default}</code>)
</ol>
@@ -1007,15 +1036,15 @@
(This is <b>not</b> the same as the 'chain|C' flag!)
<p>
<li>'<strong><code>env|E=</code></strong><i>VAR</i>:<i>VAL</i>' (set
<b>e</b>nvironment variable)<br>
- This forces an environment variable named <i>VAR</i> to be set to the
value
- <i>VAL</i>, where <i>VAL</i> can contain regexp backreferences
<tt>$N</tt>
- which will be expanded. You can use this flag more than once to set more
- than one variable. The variables can be later dereferenced at a lot of
- situations, but the usual location will be from within XSSI (via
- <tt><!--#echo var="VAR"--></tt>) or CGI (e.g.
<tt>$ENV{'VAR'}</tt>).
- But additionally you can also dereference it in a following
RewriteCond
- pattern via <tt>%{ENV:VAR}</tt>. Use this to strip but remember
- information from URLs.
+ This forces an environment variable named <i>VAR</i> to be set to the
+ value <i>VAL</i>, where <i>VAL</i> can contain regexp backreferences
+ <tt>$N</tt> and <tt>%N</tt> which will be expanded. You can use this
flag
+ more than once to set more than one variable. The variables can be later
+ dereferenced at a lot of situations, but the usual location will be from
+ within XSSI (via <tt><!--#echo var="VAR"--></tt>) or CGI (e.g.
+ <tt>$ENV{'VAR'}</tt>). But additionally you can also dereference it in
a
+ following RewriteCond pattern via <tt>%{ENV:VAR}</tt>. Use this to strip
+ but remember information from URLs.
</ul>
<p>