Hi all,
I've created a patch which allows you to specify what redirect code to use
when redirecting using the Location keyword.
When using multiple domains which have the same content, some search engines
(notably Google) impose a "duplicate content" penalty. [1] This means having
both the .net and .com for a given name, and using the usual virtual host
setup causes complications.
Virtual {
Host example.com
Host example.net
Control {
Alias /
Location /var/www
}
}
And the redirect
Virtual {
Host example.com
Control {
Alias /
Location /var/www
}
}
Virtual {
Host example.net
Control {
Alias /
Location http://example.com
}
}
uses a 302 redirect, which leads to the same penalty. [2]
The attached patches for 1.5p6 and 1.6b9 add a "RedirectStatus" keyword, which
takes a single integer between 300 and 399. This sets the HTTP status code
returned when redirecting. For avoiding the duplicate content penalty, use
301. For backwards compatibility, the default redirect code is still 302.
Virtual {
Host example.com
Control {
Alias /
Location /var/www
}
}
Virtual {
Host example.net
Control {
Alias /
Location http://example.com
RedirectStatus 301
}
}
The supplied patches also patch the documentation to cover the new keyword.
As such, they should be run from the directory containing the doc and src
subdirectories.
Please feel free to shoot through any questions or comments.
Cheers,
Allwyn.
PS: Thanks Michiel for the excellent project! :-)
[1] http://www.google.com/search?q=duplicate+content+penalty
[2] http://www.google.com/search?q=302+redirect+penalty
--
Allwyn Fernandes
Director
Stobor Pty Ltd
Mobile: + 61 430 436 758
LinkedIn: http://www.linkedin.com/in/AllwynFernandes
diff -ur mathopd-1.5p6/doc/CHANGES mathopd-1.5af/doc/CHANGES
--- mathopd-1.5p6/doc/CHANGES 2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/CHANGES 2007-09-02 14:28:18.521016062 +1000
@@ -75,6 +75,7 @@
Clobber
Wait [!]
SanitizePath
+ RedirectStatus
The following keywords have been removed:-
@@ -230,6 +231,11 @@
paths. Thanks for Peter Pentchev for suggesting this and providing
initial patches.
+ The 302 status text changed from "Moved" to "Found". [RFC2616]
+
+ Mathopd can now return status codes other than 302 for a redirect.
+ See the RedirectStatus keyword for details.
+
stub.c:
This is a new file that contains code to pass data from and to
diff -ur mathopd-1.5p6/doc/config.txt mathopd-1.5af/doc/config.txt
--- mathopd-1.5p6/doc/config.txt 2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/config.txt 2007-09-02 16:56:28.730322433 +1000
@@ -733,6 +733,16 @@
error responses. Its value may be displayed by a web browser in
a login dialog.
+Keyword: RedirectStatus
+Where: Control
+Type: Integer
+Desc: Used in conjunction with the Location redirect syntax, this sets
+ the status code returned. The default is a "302", but can be set
+ to anything in the range 3xx. Valid codes are defined by RFC2616
+ and should be adhered to. This can be useful, for example, to
+ prevent the "duplicate content" penalty imposed by some search
+ engines.
+
Keyword: Referer
Where: LogFormat
Desc: The value of the 'Referer:' header sent by the client. Sometimes
diff -ur mathopd-1.5p6/doc/syntax.txt mathopd-1.5af/doc/syntax.txt
--- mathopd-1.5p6/doc/syntax.txt 2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/syntax.txt 2007-09-02 14:48:58.139693398 +1000
@@ -90,6 +90,7 @@
"PathInfo" flag
"AutoIndexCommand" string
"SanitizePath" flag
+ "RedirectStatus" integer
server-item:
"Port" integer
diff -ur mathopd-1.5p6/src/config.c mathopd-1.5af/src/config.c
--- mathopd-1.5p6/src/config.c 2007-09-02 12:28:21.973903296 +1000
+++ mathopd-1.5af/src/config.c 2007-09-02 16:52:42.871881100 +1000
@@ -132,6 +132,7 @@
static const char c_putenv[] = "PutEnv";
static const char c_query_string[] = "QueryString";
static const char c_realm[] = "Realm";
+static const char c_redirect_status[] = "RedirectStatus";
static const char c_referer[] = "Referer";
static const char c_remote_address[] = "RemoteAddress";
static const char c_remote_port[] = "RemotePort";
@@ -173,6 +174,7 @@
static const char e_noinput[] = "no input";
static const char e_user_invalid[] = "invalid user";
static const char e_user_unknown[] = "user unknown";
+static const char e_outside_range[] = "value outside allowed range";
static const char t_close[] = "unexpected closing brace";
static const char t_eof[] = "unexpected end of file";
@@ -346,6 +348,20 @@
return 0;
}
+static const char *config_smallint_in_range(struct configuration *p, int *i, int min, int max)
+{
+ unsigned long u;
+ const char *t;
+
+ t = config_int(p, &u);
+ if (t)
+ return t;
+ if((u < min) || (u > max))
+ return e_outside_range;
+ *i = u;
+ return 0;
+}
+
static const char *config_flag(struct configuration *p, int *i)
{
const char *t;
@@ -671,6 +687,7 @@
a->path_info_ok = b->path_info_ok;
a->auto_index_command = b->auto_index_command;
a->sanitize_path = b->sanitize_path;
+ a->redirect_status = b->redirect_status;
} else {
a->index_names = 0;
a->accesses = 0;
@@ -694,6 +711,7 @@
a->path_info_ok = 1;
a->auto_index_command = 0;
a->sanitize_path = 0;
+ a->redirect_status = 0;
}
a->next = *as;
*as = a;
@@ -777,6 +795,8 @@
t = config_string(p, &a->auto_index_command);
else if (!strcasecmp(p->tokbuf, c_sanitize_path))
t = config_flag(p, &a->sanitize_path);
+ else if (!strcasecmp(p->tokbuf, c_redirect_status))
+ t = config_smallint_in_range(p, &a->redirect_status, 300, 399);
else
t = e_keyword;
if (t)
diff -ur mathopd-1.5p6/src/mathopd.h mathopd-1.5af/src/mathopd.h
--- mathopd-1.5p6/src/mathopd.h 2007-09-02 12:28:21.968904103 +1000
+++ mathopd-1.5af/src/mathopd.h 2007-09-02 16:38:38.678490237 +1000
@@ -182,6 +182,7 @@
int path_info_ok;
char *auto_index_command;
int sanitize_path;
+ int redirect_status;
};
struct virtual {
diff -ur mathopd-1.5p6/src/request.c mathopd-1.5af/src/request.c
--- mathopd-1.5p6/src/request.c 2007-09-02 12:28:21.967904264 +1000
+++ mathopd-1.5af/src/request.c 2007-09-02 18:17:50.698519532 +1000
@@ -925,7 +925,10 @@
r->location = r->path_translated;
if (debug)
log_d("redirecting");
- r->status = 302;
+ if(r->c && r->c->redirect_status)
+ r->status = r->c->redirect_status;
+ else
+ r->status = 302;
return 0;
}
if (get_path_info(r) == -1) {
@@ -1409,10 +1412,16 @@
return "204 No Content";
case 206:
return "206 Partial Content";
+ case 301:
+ return "301 Moved Permanently";
case 302:
- return "302 Moved";
+ return "302 Found";
+ case 303:
+ return "303 See Other";
case 304:
return "304 Not Modified";
+ case 307:
+ return "307 Temporary Redirect";
case 400:
return "400 Bad Request";
case 401:
@@ -1496,7 +1505,10 @@
if (pool_print(p, "Content-Range: bytes %ju-%ju/%ju\r\n", r->range_floor, r->range_ceiling, r->range_total) == -1)
return -1;
break;
+ case 301:
case 302:
+ case 303:
+ case 307:
if (r->location)
if (pool_print(p, "Location: %s\r\n", r->location) == -1)
return -1;
@@ -1551,7 +1563,10 @@
if (pool_print(p, "<title>%s</title>\n<h1>%s</h1>\n", status_line, status_line) == -1)
return -1;
switch (r->status) {
+ case 301:
case 302:
+ case 303:
+ case 307:
if (pool_print(p, "This document has moved to URL <a href=\"%s\">%s</a>.\n", r->location, r->location) == -1)
return -1;
break;
diff -ur mathopd-1.6b9/doc/CHANGES mathopd-1.6af/doc/CHANGES
--- mathopd-1.6b9/doc/CHANGES 2007-07-08 02:56:30.000000000 +1000
+++ mathopd-1.6af/doc/CHANGES 2007-09-02 16:26:06.424213396 +1000
@@ -29,6 +29,7 @@
NumProcesses
PID
SanitizePath
+ RedirectStatus
The following keywords have been removed:
diff -ur mathopd-1.6b9/doc/config.txt mathopd-1.6af/doc/config.txt
--- mathopd-1.6b9/doc/config.txt 2006-11-05 06:58:33.000000000 +1100
+++ mathopd-1.6af/doc/config.txt 2007-09-02 16:56:41.671227920 +1000
@@ -742,6 +742,16 @@
error responses. Its value may be displayed by a web browser in
a login dialog.
+Keyword: RedirectStatus
+Where: Control
+Type: Integer
+Desc: Used in conjunction with the Location redirect syntax, this sets
+ the status code returned. The default is a "302", but can be set
+ to anything in the range 3xx. Valid codes are defined by RFC2616
+ and should be adhered to. This can be useful, for example, to
+ prevent the "duplicate content" penalty imposed by some search
+ engines.
+
Keyword: Referer
Where: LogFormat
Desc: The value of the 'Referer:' header sent by the client. Sometimes
diff -ur mathopd-1.6b9/doc/syntax.txt mathopd-1.6af/doc/syntax.txt
--- mathopd-1.6b9/doc/syntax.txt 2006-11-05 06:58:33.000000000 +1100
+++ mathopd-1.6af/doc/syntax.txt 2007-09-02 15:48:27.807547858 +1000
@@ -91,6 +91,7 @@
"AutoIndexCommand" string
"ExpireInterval" integer
"SanitizePath" flag
+ "RedirectStatus" integer
server-item:
"Port" string
diff -ur mathopd-1.6b9/src/config.c mathopd-1.6af/src/config.c
--- mathopd-1.6b9/src/config.c 2007-07-08 02:04:12.000000000 +1000
+++ mathopd-1.6af/src/config.c 2007-09-02 16:51:29.044832429 +1000
@@ -128,6 +128,7 @@
static const char c_putenv[] = "PutEnv";
static const char c_query_string[] = "QueryString";
static const char c_realm[] = "Realm";
+static const char c_redirect_status[] = "RedirectStatus";
static const char c_referer[] = "Referer";
static const char c_remote_address[] = "RemoteAddress";
static const char c_remote_port[] = "RemotePort";
@@ -167,6 +168,7 @@
static const char e_user_invalid[] = "invalid user";
static const char e_user_unknown[] = "user unknown";
static const char e_toobig[] = "number too big";
+static const char e_outside_range[] = "value outside allowed range";
static const char t_close[] = "unexpected closing brace";
static const char t_eof[] = "unexpected end of file";
@@ -326,6 +328,20 @@
return 0;
}
+static const char *config_smallint_in_range(struct configuration *p, int *i, int min, int max)
+{
+ unsigned long u;
+ const char *t;
+
+ t = config_int(p, &u);
+ if (t)
+ return t;
+ if((u < min) || (u > max))
+ return e_outside_range;
+ *i = u;
+ return 0;
+}
+
static const char *config_flag(struct configuration *p, int *i)
{
const char *t;
@@ -538,6 +554,7 @@
a->auto_index_command = b->auto_index_command;
a->expire_interval = b->expire_interval;
a->sanitize_path = b->sanitize_path;
+ a->redirect_status = b->redirect_status;
} else {
a->index_names = 0;
a->mimes = 0;
@@ -560,6 +577,7 @@
a->auto_index_command = 0;
a->expire_interval = 0;
a->sanitize_path = 0;
+ a->redirect_status = 0;
}
a->next = *as;
*as = a;
@@ -639,6 +657,8 @@
t = config_int(p, &a->expire_interval);
else if (!strcasecmp(p->tokbuf, c_sanitize_path))
t = config_flag(p, &a->sanitize_path);
+ else if (!strcasecmp(p->tokbuf, c_redirect_status))
+ t = config_smallint_in_range(p, &a->redirect_status, 300, 399);
else
t = e_keyword;
if (t)
diff -ur mathopd-1.6b9/src/mathopd.h mathopd-1.6af/src/mathopd.h
--- mathopd-1.6b9/src/mathopd.h 2007-07-08 02:04:12.000000000 +1000
+++ mathopd-1.6af/src/mathopd.h 2007-09-02 15:48:27.823545270 +1000
@@ -177,6 +177,7 @@
char *auto_index_command;
unsigned long expire_interval;
int sanitize_path;
+ int redirect_status;
};
struct virtual {
diff -ur mathopd-1.6b9/src/request.c mathopd-1.6af/src/request.c
--- mathopd-1.6b9/src/request.c 2007-07-21 20:41:13.000000000 +1000
+++ mathopd-1.6af/src/request.c 2007-09-02 18:18:21.407547150 +1000
@@ -907,7 +907,10 @@
r->location = r->path_translated;
if (debug)
log_d("redirecting");
- r->status = 302;
+ if(r->c && r->c->redirect_status)
+ r->status = r->c->redirect_status;
+ else
+ r->status = 302;
return 0;
}
if (get_path_info(r) == -1) {
@@ -1393,10 +1396,16 @@
return "204 No Content";
case 206:
return "206 Partial Content";
+ case 301:
+ return "301 Moved Permanently";
case 302:
- return "302 Moved";
+ return "302 Found";
+ case 303:
+ return "303 See Other";
case 304:
return "304 Not Modified";
+ case 307:
+ return "307 Temporary Redirect";
case 400:
return "400 Bad Request";
case 401:
@@ -1481,7 +1490,10 @@
if (pool_print(p, "Content-Range: bytes %ju-%ju/%ju\r\n", r->range_floor, r->range_ceiling, r->range_total) == -1)
return -1;
break;
+ case 301:
case 302:
+ case 303:
+ case 307:
if (r->location)
if (pool_print(p, "Location: %s\r\n", r->location) == -1)
return -1;
@@ -1542,7 +1554,10 @@
if (pool_print(p, "<title>%s</title>\n<h1>%s</h1>\n", status_line, status_line) == -1)
return -1;
switch (r->status) {
+ case 301:
case 302:
+ case 303:
+ case 307:
if (pool_print(p, "This document has moved to URL <a href=\"%s\">%s</a>.\n", r->location, r->location) == -1)
return -1;
break;