Author: ssmiweve
Date: 2008-09-18 11:43:03 +0200 (Thu, 18 Sep 2008)
New Revision: 6823
Modified:
branches/2.17/war/src/main/java/no/sesat/search/http/servlet/BoomerangServlet.java
Log:
SEARCH-5022- Correct sitemap erros etc. appearing in the Google webmaster tool
Modified:
branches/2.17/war/src/main/java/no/sesat/search/http/servlet/BoomerangServlet.java
===================================================================
---
branches/2.17/war/src/main/java/no/sesat/search/http/servlet/BoomerangServlet.java
2008-09-18 08:24:45 UTC (rev 6822)
+++
branches/2.17/war/src/main/java/no/sesat/search/http/servlet/BoomerangServlet.java
2008-09-18 09:43:03 UTC (rev 6823)
@@ -31,6 +31,7 @@
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
+import java.util.regex.Pattern;
import no.sesat.search.datamodel.DataModel;
import no.sesat.search.datamodel.generic.StringDataObject;
import org.apache.commons.lang.StringEscapeUtils;
@@ -61,6 +62,8 @@
private static final String CEREMONIAL = "/boomerang/";
+ private static final Pattern ROBOTS =
Pattern.compile("(Googlebot|Slurp|Crawler|Bot)", Pattern.CASE_INSENSITIVE);
+
@Override
public void destroy() { }
@@ -117,8 +120,9 @@
kangerooGrub(entrails);
LOG.debug("Ceremonial boomerang to " + destination.toString());
-
if(req.getHeader("User-agent").matches("(Googlebot|Slurp|Yahoo\\! Slurp)")){
- // crawlers like permanent redirects. and we're not
interested in their clicks so ok to cache.
+
+ if(ROBOTS.matcher(req.getHeader("User-agent")).find()){
+ // robots like permanent redirects. and we're not
interested in their clicks so ok to cache.
res.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
res.setHeader("Location", destination.toString());
res.setHeader("Connection", "close");
_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits