Author: norman
Date: Sat Nov 14 16:17:23 2009
New Revision: 836203

URL: http://svn.apache.org/viewvc?rev=836203&view=rev
Log:
fixes dangerous-attributes removal regex (HUPA-52). Thx to Manuel for 
submitting this patch

Modified:
    
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
    
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
    
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java

Modified: 
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
URL: 
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
--- 
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
 (original)
+++ 
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
 Sat Nov 14 16:17:23 2009
@@ -165,7 +165,6 @@
             IOException {
        boolean isHTML = false;
         if (con instanceof String) {
-            System.out.println("sc: " + message.getContentType());
             if (message.getContentType().startsWith("text/html")) {
                 isHTML = true;
             } else {
@@ -177,7 +176,6 @@
 
             Multipart mp = (Multipart) con;
             String multipartContentType = mp.getContentType().toLowerCase();
-            System.out.println("mc: " + multipartContentType);
             
             String text = null;
 
@@ -188,7 +186,6 @@
                     Part part = mp.getBodyPart(i);
 
                     String contentType = part.getContentType().toLowerCase();
-                    System.out.println("c: " + contentType);
 
                     if (text == null && contentType.startsWith("text/plain") ) 
{
                         isHTML = false;
@@ -230,7 +227,6 @@
             Part part = mp.getBodyPart(i);
             
             String contentType = part.getContentType().toLowerCase();
-            System.out.println("m: " + contentType);
 
             // we prefer html
             if (text == null && contentType.startsWith("text/plain")) {
@@ -250,9 +246,9 @@
     static Pattern regex_http = Pattern.compile(HTML_LINK_REGEXP);
     static String repl_http = "<a href=\"$1\">$1</a>";
 
-    static String EMAIL_REGEXP =  
"\\b([a-z0-9._%\\+\\...@[a-z0-9\\.\\-]+\\.[a-z]{2,4})\\b";
-    static Pattern regex_email = Pattern.compile(EMAIL_REGEXP);
-    static String repl_email = "<a href=\"mailto:$1\";>$1</a>";
+    static String EMAIL_REGEXP =  
"\\b(?<![A-z0-9._%\\+\\-=])([a-z][a-z0-9._%\\+\\...@[a-z0-9\\.\\-]+\\.[a-z]{2,4})";
+    static Pattern regex_email = Pattern.compile("\\b"+ EMAIL_REGEXP);
+    static String repl_email = "<a href=\"mailto:$1\";>$1</a>$1";
     
     static Pattern regex_inlineImg = 
Pattern.compile("(?si)(<\\s*img\\s+.*?src=[\"'])cid:([^\"']+[\"'])");
     static String repl_inlineImg = "$1" + SConsts.HUPA + 
SConsts.SERVLET_DOWNLOAD 
@@ -266,8 +262,9 @@
     static Pattern regex_unneededTags = 
Pattern.compile("(?si)(</?(html|body)[^>]*?>)");
     static String repl_unneededTags = "";
 
-    static Pattern regex_badAttrs = 
Pattern.compile("(?si)(<)(\\w+)(\\s.+?)onClick=(\".+?\"|'.+?')(.*?</)(\\2)(\\s*>)");
-    static String repl_badAttrs = "$1$2$3 $5$6$7";
+    static String EVENT_ATTR_REGEX = 
"(?:on[dbl]*click)|(?:onmouse[a-z]+)|(?:onkey[a-z]+)";
+    static Pattern regex_badAttrs = Pattern.compile("(?si)(<\\w+[^<>]*)\\s+("+ 
EVENT_ATTR_REGEX + ")=[\"']?([^\\s<>]+?)[\"']?([\\s>])");
+    static String repl_badAttrs = "$1$4";
     
     static Pattern regex_orphandHttpLinks = 
Pattern.compile("(?si)(?!.*<a\\s?[^>]*?>.+</a\\s*>.*)(<[^<]*?>[^<>]*)" + 
HTML_LINK_REGEXP + "([^<>]*<[^>]*?>)");
     static String repl_orphandHttpLinks = "$1<a href=\"$2\">$2</a>$3";
@@ -281,8 +278,15 @@
     static Pattern regex_existingEmailLinks = 
Pattern.compile("(?si)<a\\s[^>]*?href=[\"']*mailto:[\";']?([^\"]+)[\"']?");
     static String repl_existngEmailLinks = "<a onClick=\"mailTo('$1');return 
false;\" href=\"mailto:$1\"";;
     
-    protected String replaceAll(String txt, Pattern pattern, String repl) {
-        return pattern.matcher(txt).replaceAll(repl);
+    
+    protected String replaceAll(String txt, Pattern pattern, String 
replacement) {
+        return pattern.matcher(txt).replaceAll(replacement);
+    }
+    
+    protected String replaceAllRecursive(String txt, Pattern pattern, String 
replacement) {
+        while (pattern.matcher(txt).find())
+            txt = pattern.matcher(txt).replaceAll(replacement);
+        return txt;
     }
 
     protected String txtDocumentToHtml(String txt, String folderName, long 
uuid) {
@@ -321,7 +325,7 @@
         // Remove body and html tags
         html = replaceAll(html, regex_unneededTags, repl_unneededTags);
         // Remove all onClick attributes 
-        html = replaceAll(html, regex_badAttrs, repl_badAttrs);
+        html = replaceAllRecursive(html, regex_badAttrs, repl_badAttrs);
         // Add <a> tags to links which are not already into <a>
         html = replaceAll(html, regex_orphandHttpLinks, repl_orphandHttpLinks);
         // Add javascript method to <a> in order to open links in a different 
window

Modified: 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
URL: 
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
--- 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
 (original)
+++ 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
 Sat Nov 14 16:17:23 2009
@@ -42,9 +42,13 @@
 
     public void testRegexEmail() {
         String txt, res;
-        txt = "!abcd091_%[email protected]+";
+        txt = "!'bcd091_%[email protected]+";
         res = handler.replaceAll(txt, GetMessageDetailsHandler.regex_email, 
"");
-        assertEquals("!+", res);
+        assertEquals("!'+", res);
+
+        txt = "!'=bcd091_%[email protected]+";
+        res = handler.replaceAll(txt, GetMessageDetailsHandler.regex_email, 
"");
+        assertEquals(txt, res);
     }
 
     public void testRegexInlineImg() {
@@ -61,6 +65,18 @@
         res = handler.replaceAll(res, 
GetMessageDetailsHandler.regex_unneededTags, 
GetMessageDetailsHandler.repl_unneededTags);
         assertEquals("...", res);
     }
+
+    public void testRegexBadAttributes() {
+
+        String txt, res;
+        txt = "... <div attr=a onClick=\"something('');\"> ...";
+        res = handler.replaceAllRecursive(txt, 
GetMessageDetailsHandler.regex_badAttrs, 
GetMessageDetailsHandler.repl_badAttrs);
+        assertEquals("... <div attr=a> ...", res);
+
+        txt = "... <div attr=a onClick=\"something('');\" attr=b 
onMouseOver=whatever attr=c onKeyup=\"\" /> ...";
+        res = handler.replaceAllRecursive(txt, 
GetMessageDetailsHandler.regex_badAttrs, 
GetMessageDetailsHandler.repl_badAttrs);
+        assertEquals("... <div attr=a attr=b attr=c /> ...", res);
+    }
     
     public void testRegexHtmlLinks() {
         String txt, res;
@@ -185,6 +201,12 @@
         res = handler.filterHtmlDocument(msg, "aFolder", 9999l);
         assertFalse(res.contains("mailTo("));
         assertTrue(res.contains("openLink("));
+        
+        msg = 
"http://accounts.myspace.com.deaaaf.me.uk/msp/index.php?fuseaction=update&code=78E2BL6-EKY5L893K4MHSA-74ESO-D743U41GYB18J-FA18EI698V4M&[email protected]";;
+        res = handler.txtDocumentToHtml(msg, "aFolder", 9999l);
+        assertFalse(res.contains("mailTo("));
+        assertTrue(res.contains("openLink("));
+        
     }
 
        private MessageDetails loadMessageDetails(String msgFile) throws 
Exception {

Modified: 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
URL: 
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
--- 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
 (original)
+++ 
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
 Sat Nov 14 16:17:23 2009
@@ -19,7 +19,6 @@
                Part part = DownloadAttachmentServlet.handleMultiPart(message
                        .getContent(), 
"[email protected]");
                assertNotNull(part);
-               System.out.println(part.getContentType());
        }
 
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to