Hi all,

I've created a patch which allows you to specify what redirect code to use 
when redirecting using the Location keyword. 

When using multiple domains which have the same content, some search engines 
(notably Google) impose a "duplicate content" penalty. [1] This means having 
both the .net and .com for a given name, and using the usual virtual host 
setup causes complications.

Virtual {
    Host example.com
    Host example.net
    Control {
        Alias /
        Location /var/www
    }
}

And the redirect 

Virtual {
    Host example.com
    Control {
        Alias /
        Location /var/www
    }
}
Virtual {
    Host example.net
    Control {
        Alias /
        Location http://example.com
    }
}

uses a 302 redirect, which leads to the same penalty. [2]

The attached patches for 1.5p6 and 1.6b9 add a "RedirectStatus" keyword, which 
takes a single integer between 300 and 399. This sets the HTTP status code 
returned when redirecting. For avoiding the duplicate content penalty, use 
301. For backwards compatibility, the default redirect code is still 302.

Virtual {
    Host example.com
    Control {
        Alias /
        Location /var/www
    }
}
Virtual {
    Host example.net
    Control {
        Alias /
        Location http://example.com
        RedirectStatus 301
    }
}

The supplied patches also patch the documentation to cover the new keyword.
As such, they should be run from the directory containing the doc and src 
subdirectories.

Please feel free to shoot through any questions or comments. 

Cheers,

Allwyn.

PS: Thanks Michiel for the excellent project! :-) 

[1] http://www.google.com/search?q=duplicate+content+penalty
[2] http://www.google.com/search?q=302+redirect+penalty

-- 
Allwyn Fernandes
Director
Stobor Pty Ltd

Mobile: + 61 430 436 758
LinkedIn: http://www.linkedin.com/in/AllwynFernandes
diff -ur mathopd-1.5p6/doc/CHANGES mathopd-1.5af/doc/CHANGES
--- mathopd-1.5p6/doc/CHANGES	2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/CHANGES	2007-09-02 14:28:18.521016062 +1000
@@ -75,6 +75,7 @@
 		Clobber
 		Wait [!]
 		SanitizePath
+		RedirectStatus
 
 	The following keywords have been removed:-
 
@@ -230,6 +231,11 @@
 	paths. Thanks for Peter Pentchev for suggesting this and providing
 	initial patches.
 
+	The 302 status text changed from "Moved" to "Found". [RFC2616]
+
+	Mathopd can now return status codes other than 302 for a redirect. 
+	See the RedirectStatus keyword for details.
+
 stub.c:
 
 	This is a new file that contains code to pass data from and to
diff -ur mathopd-1.5p6/doc/config.txt mathopd-1.5af/doc/config.txt
--- mathopd-1.5p6/doc/config.txt	2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/config.txt	2007-09-02 16:56:28.730322433 +1000
@@ -733,6 +733,16 @@
          error responses. Its value may be displayed by a web browser in
          a login dialog.
 
+Keyword: RedirectStatus
+Where:   Control
+Type:    Integer
+Desc:    Used in conjunction with the Location redirect syntax, this sets
+         the status code returned. The default is a "302", but can be set
+         to anything in the range 3xx. Valid codes are defined by RFC2616
+         and should be adhered to. This can be useful, for example, to
+         prevent the "duplicate content" penalty imposed by some search 
+         engines.
+
 Keyword: Referer
 Where:   LogFormat
 Desc:    The value of the 'Referer:' header sent by the client. Sometimes
diff -ur mathopd-1.5p6/doc/syntax.txt mathopd-1.5af/doc/syntax.txt
--- mathopd-1.5p6/doc/syntax.txt	2007-07-21 00:04:08.000000000 +1000
+++ mathopd-1.5af/doc/syntax.txt	2007-09-02 14:48:58.139693398 +1000
@@ -90,6 +90,7 @@
 	"PathInfo" flag
 	"AutoIndexCommand" string
 	"SanitizePath" flag
+	"RedirectStatus" integer
 
 server-item:
 	"Port" integer
diff -ur mathopd-1.5p6/src/config.c mathopd-1.5af/src/config.c
--- mathopd-1.5p6/src/config.c	2007-09-02 12:28:21.973903296 +1000
+++ mathopd-1.5af/src/config.c	2007-09-02 16:52:42.871881100 +1000
@@ -132,6 +132,7 @@
 static const char c_putenv[] =			"PutEnv";
 static const char c_query_string[] =		"QueryString";
 static const char c_realm[] =			"Realm";
+static const char c_redirect_status[] =		"RedirectStatus";
 static const char c_referer[] =			"Referer";
 static const char c_remote_address[] =		"RemoteAddress";
 static const char c_remote_port[] =		"RemotePort";
@@ -173,6 +174,7 @@
 static const char e_noinput[] =		"no input";
 static const char e_user_invalid[] =	"invalid user";
 static const char e_user_unknown[] =	"user unknown";
+static const char e_outside_range[] =	"value outside allowed range";
 
 static const char t_close[] =		"unexpected closing brace";
 static const char t_eof[] =		"unexpected end of file";
@@ -346,6 +348,20 @@
 	return 0;
 }
 
+static const char *config_smallint_in_range(struct configuration *p, int *i, int min, int max)
+{
+	unsigned long u;
+	const char *t;
+	
+	t = config_int(p, &u);
+	if (t)
+		return t;
+	if((u < min) || (u > max))
+		return e_outside_range;
+	*i = u;
+	return 0;
+}
+
 static const char *config_flag(struct configuration *p, int *i)
 {
 	const char *t;
@@ -671,6 +687,7 @@
 		a->path_info_ok = b->path_info_ok;
 		a->auto_index_command = b->auto_index_command;
 		a->sanitize_path = b->sanitize_path;
+		a->redirect_status = b->redirect_status;
 	} else {
 		a->index_names = 0;
 		a->accesses = 0;
@@ -694,6 +711,7 @@
 		a->path_info_ok = 1;
 		a->auto_index_command = 0;
 		a->sanitize_path = 0;
+		a->redirect_status = 0;
 	}
 	a->next = *as;
 	*as = a;
@@ -777,6 +795,8 @@
 			t = config_string(p, &a->auto_index_command);
 		else if (!strcasecmp(p->tokbuf, c_sanitize_path))
 			t = config_flag(p, &a->sanitize_path);
+		else if (!strcasecmp(p->tokbuf, c_redirect_status))
+			t = config_smallint_in_range(p, &a->redirect_status, 300, 399);
 		else
 			t = e_keyword;
 		if (t)
diff -ur mathopd-1.5p6/src/mathopd.h mathopd-1.5af/src/mathopd.h
--- mathopd-1.5p6/src/mathopd.h	2007-09-02 12:28:21.968904103 +1000
+++ mathopd-1.5af/src/mathopd.h	2007-09-02 16:38:38.678490237 +1000
@@ -182,6 +182,7 @@
 	int path_info_ok;
 	char *auto_index_command;
 	int sanitize_path;
+	int redirect_status;
 };
 
 struct virtual {
diff -ur mathopd-1.5p6/src/request.c mathopd-1.5af/src/request.c
--- mathopd-1.5p6/src/request.c	2007-09-02 12:28:21.967904264 +1000
+++ mathopd-1.5af/src/request.c	2007-09-02 18:17:50.698519532 +1000
@@ -925,7 +925,10 @@
 		r->location = r->path_translated;
 		if (debug)
 			log_d("redirecting");
-		r->status = 302;
+		if(r->c && r->c->redirect_status)
+			r->status = r->c->redirect_status;
+		else
+			r->status = 302;
 		return 0;
 	}
 	if (get_path_info(r) == -1) {
@@ -1409,10 +1412,16 @@
 		return "204 No Content";
 	case 206:
 		return "206 Partial Content";
+	case 301:
+		return "301 Moved Permanently";
 	case 302:
-		return "302 Moved";
+		return "302 Found";
+	case 303:
+		return "303 See Other";
 	case 304:
 		return "304 Not Modified";
+	case 307:
+		return "307 Temporary Redirect";
 	case 400:
 		return "400 Bad Request";
 	case 401:
@@ -1496,7 +1505,10 @@
 		if (pool_print(p, "Content-Range: bytes %ju-%ju/%ju\r\n", r->range_floor, r->range_ceiling, r->range_total) == -1)
 			return -1;
 		break;
+	case 301:
 	case 302:
+	case 303:
+	case 307:
 		if (r->location)
 			if (pool_print(p, "Location: %s\r\n", r->location) == -1)
 				return -1;
@@ -1551,7 +1563,10 @@
 	if (pool_print(p, "<title>%s</title>\n<h1>%s</h1>\n", status_line, status_line) == -1)
 		return -1;
 	switch (r->status) {
+	case 301:
 	case 302:
+	case 303:
+	case 307:
 		if (pool_print(p, "This document has moved to URL <a href=\"%s\">%s</a>.\n", r->location, r->location) == -1)
 			return -1;
 		break;
diff -ur mathopd-1.6b9/doc/CHANGES mathopd-1.6af/doc/CHANGES
--- mathopd-1.6b9/doc/CHANGES	2007-07-08 02:56:30.000000000 +1000
+++ mathopd-1.6af/doc/CHANGES	2007-09-02 16:26:06.424213396 +1000
@@ -29,6 +29,7 @@
 	NumProcesses
 	PID
 	SanitizePath
+	RedirectStatus
 
 The following keywords have been removed:
 
diff -ur mathopd-1.6b9/doc/config.txt mathopd-1.6af/doc/config.txt
--- mathopd-1.6b9/doc/config.txt	2006-11-05 06:58:33.000000000 +1100
+++ mathopd-1.6af/doc/config.txt	2007-09-02 16:56:41.671227920 +1000
@@ -742,6 +742,16 @@
          error responses. Its value may be displayed by a web browser in
          a login dialog.
 
+Keyword: RedirectStatus
+Where:   Control
+Type:    Integer
+Desc:    Used in conjunction with the Location redirect syntax, this sets
+         the status code returned. The default is a "302", but can be set
+         to anything in the range 3xx. Valid codes are defined by RFC2616
+         and should be adhered to. This can be useful, for example, to
+         prevent the "duplicate content" penalty imposed by some search 
+         engines.
+
 Keyword: Referer
 Where:   LogFormat
 Desc:    The value of the 'Referer:' header sent by the client. Sometimes
diff -ur mathopd-1.6b9/doc/syntax.txt mathopd-1.6af/doc/syntax.txt
--- mathopd-1.6b9/doc/syntax.txt	2006-11-05 06:58:33.000000000 +1100
+++ mathopd-1.6af/doc/syntax.txt	2007-09-02 15:48:27.807547858 +1000
@@ -91,6 +91,7 @@
 	"AutoIndexCommand" string
 	"ExpireInterval" integer
 	"SanitizePath" flag
+	"RedirectStatus" integer
 
 server-item:
 	"Port" string
diff -ur mathopd-1.6b9/src/config.c mathopd-1.6af/src/config.c
--- mathopd-1.6b9/src/config.c	2007-07-08 02:04:12.000000000 +1000
+++ mathopd-1.6af/src/config.c	2007-09-02 16:51:29.044832429 +1000
@@ -128,6 +128,7 @@
 static const char c_putenv[] =			"PutEnv";
 static const char c_query_string[] =		"QueryString";
 static const char c_realm[] =			"Realm";
+static const char c_redirect_status[] =		"RedirectStatus";
 static const char c_referer[] =			"Referer";
 static const char c_remote_address[] =		"RemoteAddress";
 static const char c_remote_port[] =		"RemotePort";
@@ -167,6 +168,7 @@
 static const char e_user_invalid[] =	"invalid user";
 static const char e_user_unknown[] =	"user unknown";
 static const char e_toobig[] =		"number too big";
+static const char e_outside_range[] =	"value outside allowed range";
 
 static const char t_close[] =		"unexpected closing brace";
 static const char t_eof[] =		"unexpected end of file";
@@ -326,6 +328,20 @@
 	return 0;
 }
 
+static const char *config_smallint_in_range(struct configuration *p, int *i, int min, int max)
+{
+	unsigned long u;
+	const char *t;
+
+	t = config_int(p, &u);
+	if (t)
+		return t;
+	if((u < min) || (u > max))
+		return e_outside_range;
+	*i = u;
+	return 0;
+}
+
 static const char *config_flag(struct configuration *p, int *i)
 {
 	const char *t;
@@ -538,6 +554,7 @@
 		a->auto_index_command = b->auto_index_command;
 		a->expire_interval = b->expire_interval;
 		a->sanitize_path = b->sanitize_path;
+		a->redirect_status = b->redirect_status;
 	} else {
 		a->index_names = 0;
 		a->mimes = 0;
@@ -560,6 +577,7 @@
 		a->auto_index_command = 0;
 		a->expire_interval = 0;
 		a->sanitize_path = 0;
+		a->redirect_status = 0;
 	}
 	a->next = *as;
 	*as = a;
@@ -639,6 +657,8 @@
 			t = config_int(p, &a->expire_interval);
 		else if (!strcasecmp(p->tokbuf, c_sanitize_path))
 			t = config_flag(p, &a->sanitize_path);
+		else if (!strcasecmp(p->tokbuf, c_redirect_status))
+			t = config_smallint_in_range(p, &a->redirect_status, 300, 399);
 		else
 			t = e_keyword;
 		if (t)
diff -ur mathopd-1.6b9/src/mathopd.h mathopd-1.6af/src/mathopd.h
--- mathopd-1.6b9/src/mathopd.h	2007-07-08 02:04:12.000000000 +1000
+++ mathopd-1.6af/src/mathopd.h	2007-09-02 15:48:27.823545270 +1000
@@ -177,6 +177,7 @@
 	char *auto_index_command;
 	unsigned long expire_interval;
 	int sanitize_path;
+	int redirect_status;
 };
 
 struct virtual {
diff -ur mathopd-1.6b9/src/request.c mathopd-1.6af/src/request.c
--- mathopd-1.6b9/src/request.c	2007-07-21 20:41:13.000000000 +1000
+++ mathopd-1.6af/src/request.c	2007-09-02 18:18:21.407547150 +1000
@@ -907,7 +907,10 @@
 		r->location = r->path_translated;
 		if (debug)
 			log_d("redirecting");
-		r->status = 302;
+		if(r->c && r->c->redirect_status)
+			r->status = r->c->redirect_status;
+		else
+			r->status = 302;
 		return 0;
 	}
 	if (get_path_info(r) == -1) {
@@ -1393,10 +1396,16 @@
 		return "204 No Content";
 	case 206:
 		return "206 Partial Content";
+	case 301:
+		return "301 Moved Permanently";
 	case 302:
-		return "302 Moved";
+		return "302 Found";
+	case 303:
+		return "303 See Other";
 	case 304:
 		return "304 Not Modified";
+	case 307:
+		return "307 Temporary Redirect";
 	case 400:
 		return "400 Bad Request";
 	case 401:
@@ -1481,7 +1490,10 @@
 		if (pool_print(p, "Content-Range: bytes %ju-%ju/%ju\r\n", r->range_floor, r->range_ceiling, r->range_total) == -1)
 			return -1;
 		break;
+	case 301:
 	case 302:
+	case 303:
+	case 307:
 		if (r->location)
 			if (pool_print(p, "Location: %s\r\n", r->location) == -1)
 				return -1;
@@ -1542,7 +1554,10 @@
 	if (pool_print(p, "<title>%s</title>\n<h1>%s</h1>\n", status_line, status_line) == -1)
 		return -1;
 	switch (r->status) {
+	case 301:
 	case 302:
+	case 303:
+	case 307:
 		if (pool_print(p, "This document has moved to URL <a href=\"%s\">%s</a>.\n", r->location, r->location) == -1)
 			return -1;
 		break;

Reply via email to