filters DisableUrlSessionFilter.java

Nico Klasens Mon, 21 Apr 2008 03:13:57 -0700

Update of 
/var/cvs/contributions/CMSContainer/cmsc/utilities/src/java/com/finalist/cmsc/filters
In directory 
james.mmbase.org:/tmp/cvs-serv657/cmsc/utilities/src/java/com/finalist/cmsc/filters


Modified Files:
      Tag: b1_4
        DisableUrlSessionFilter.java 
Log Message:
CMSC-863 HttpSessions are used when not necessary and created for useragents 
which do not maintain a session


See also: 
http://cvs.mmbase.org/viewcvs/contributions/CMSContainer/cmsc/utilities/src/java/com/finalist/cmsc/filters
See also: http://www.mmbase.org/jira/browse/CMSC-863


Index: DisableUrlSessionFilter.java
===================================================================
RCS file: 
/var/cvs/contributions/CMSContainer/cmsc/utilities/src/java/com/finalist/cmsc/filters/DisableUrlSessionFilter.java,v
retrieving revision 1.3
retrieving revision 1.3.2.1
diff -u -b -r1.3 -r1.3.2.1
--- DisableUrlSessionFilter.java        26 Nov 2007 11:43:44 -0000      1.3
+++ DisableUrlSessionFilter.java        21 Apr 2008 10:11:28 -0000      1.3.2.1
@@ -10,14 +10,23 @@
 package com.finalist.cmsc.filters;
 
 import java.io.IOException;
+import java.util.regex.Pattern;
+
 import javax.servlet.*;
 import javax.servlet.http.*;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 /**
- * This Filter disables Url Rewriting and adds Url encoding for all urls. URL
- * Encoding is a process of transforming user input to a CGI form so it is fit
+ * This Filter disables Url Rewriting and cleans Sessions for user agents which
+ * do not support cookies. Example of these user agents are search engines.
+ * 
+ * URL Encoding is a process of transforming user input to a CGI form so it is 
fit
  * for travel across the network -- basically, stripping spaces and punctuation
  * and replacing with escape characters. URL Decoding is the reverse process.
+ * 
  * URL Rewriting is a technique for saving state information on the user's
  * browser between page hits. It's sort of like cookies, only the information
  * gets stored inside the URL, as an additional parameter. The HttpSession API,
@@ -26,6 +35,10 @@
  */
 public class DisableUrlSessionFilter implements Filter {
 
+   private static final Log log = 
LogFactory.getLog(DisableUrlSessionFilter.class);
+
+   private Pattern userAgentPattern;
+    
    /**
     * Filters requests to disable URL-based session identifiers.
     */
@@ -42,10 +55,11 @@
 
       // clear session if session id in URL
       if (httpRequest.isRequestedSessionIdFromURL()) {
-         HttpSession session = httpRequest.getSession();
-         if (session != null)
+         HttpSession session = httpRequest.getSession(false);
+         if (session != null) {
             session.invalidate();
       }
+      }
 
       // wrap response to remove URL encoding
       HttpServletResponseWrapper wrappedResponse = new 
HttpServletResponseWrapper(httpResponse) {
@@ -82,25 +96,97 @@
           * @return
           */
          private String getEncodedUrl(String url) {
-            // try {
-            // return URLEncoder.encode(url,"UTF-8");
-            // }
-            // catch (UnsupportedEncodingException e) {
-            // // should not happen UTF-8 is mandatory
             return url;
-            // }
          }
       };
 
       chain.doFilter(request, wrappedResponse);
+      
+      cleanSessions(httpRequest);
    }
 
+    private void cleanSessions(HttpServletRequest httpRequest) {
+        try {
+            String userAgent = httpRequest.getHeader("User-Agent");
+            if (userAgentPattern != null && 
userAgentPattern.matcher(userAgent).find()) {
+                HttpSession session = httpRequest.getSession(false);
+                if (session != null && session.isNew()) {
+                    session.invalidate();
+                }
+            }
+        }
+        catch (Exception ex) {
+            log.fatal("can't process useragent pattern", ex);
+        }
+    }
 
    public void init(FilterConfig config) {
-      // nothing
+       String useragents = config.getInitParameter("useragents");
+       if (StringUtils.isBlank(useragents)) {
+           String searchEngines = config.getInitParameter("searchEngines");
+           if (StringUtils.isBlank(searchEngines)) {
+               searchEngines = getSearchEngines();
+           }
+           String feedReaders = config.getInitParameter("feedReaders");
+           if (StringUtils.isBlank(feedReaders)) {
+               feedReaders = getFeedReaders();
+           }
+
+           useragents = searchEngines + "|" + feedReaders;
+       }
+       userAgentPattern = Pattern.compile(useragents);
+   }
+
+    private String getSearchEngines() {
+        // Search engines / Crawlers
+        // Mozilla/5.0 (compatible; Googlebot/2.1; 
+http://www.google.com/bot.html)
+        // Googlebot/2.1 (+http://www.google.com/bot.html)
+        // Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; 
http://help.yahoo.com/help/us/ysearch/slurp)
+        // msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)
+        // IlseBot/1.1
+        // Gigabot/3.0 (http://www.gigablast.com/spider.html)
+        // Baiduspider+(+http://www.baidu.com/search/spider.htm)
+        // WebAlta Crawler/2.0 (http://www.webalta.net/ru/about_webmaster)
+        // TinEye/1.1 (http://tineye.com/crawler.html)
+        // ICC-Crawler(Mozilla-compatible; 
icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)
+        // Yeti/1.0 (+http://help.naver.com/robots/)
+        // REAP-crawler Nutch/Nutch-1.0-dev (Reap Project; 
http://reap.cs.cmu.edu/REAP-crawler/;
+        // Reap Project)
+        // Snapbot/1.0 (Snap Shots, +http://www.snap.com)
+
+        String searchEngines = 
"google.com|search.msn.com|yahoo.com|IlseBot|gigablast.com"
+                + 
"|Baiduspider|WebAlta|tineye.com|ICC-Crawler|Yeti|REAP-crawler|Snapbot";
+        return searchEngines;
+    }
+
+    private String getFeedReaders() {
+        // feed readers
+        // Feedfetcher-Google; (+http://www.google.com/feedfetcher.html;
+        // msnbot-NewsBlogs/1.0 (+http://search.msn.com/msnbot.htm)
+        // ilse-patgen/1.0
+        // Telegraaf Koppensneller
+
+        // FeedForAll rss2html.php v2
+        // UniversalFeedParser/3.3 +http://feedparser.org/
+        // RssReader/1.0.88.0 (http://www.rssreader.com)
+        // Feedreader 3.12 (Powered by Newsbrain)
+        // FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)
+        // SharpReader/0.9.7.0 (.NET CLR 1.1.4322.573; WinNT 5.1.2600.0)
+        // YahooFeedSeeker/2.0
+        // SimplePie/1.0 (Feed Parser; http://simplepie.org/
+
+        // Snarfer/0.9.1 (http://www.snarfware.com/)
+        // BlogBridge 5.0.1 (http://www.blogbridge.com/)
+        // MagpieRSS/0.72 (+http://magpierss.sf.net)
+        // Akregator/1.2.7; librss/remnants
+        // Netvibes (http://www.netvibes.com/
+        // Rasasa/1.2 (http://www.rasasa.com; 1 subscribers)
+        String feedReaders = "google.com|search.msn.com|ilse-patgen|Telegraaf"
+            + "|Feed|Reader"
+            + "|Snarfer|BlogBridge|MagpieRSS|Akregator|Netvibes|Rasasa";
+        return feedReaders;
    }
 
-
    public void destroy() {
       // nothing
    }
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs

[MMBASE CVS] contributions/CMSContainer/cmsc/utilities/src/java/com/finalist/cmsc/filters DisableUrlSessionFilter.java

Reply via email to