Author: norman
Date: Sat Nov 14 16:17:23 2009
New Revision: 836203
URL: http://svn.apache.org/viewvc?rev=836203&view=rev
Log:
fixes dangerous-attributes removal regex (HUPA-52). Thx to Manuel for
submitting this patch
Modified:
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
Modified:
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
URL:
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
---
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
(original)
+++
james/hupa/trunk/server/src/main/java/org/apache/hupa/server/handler/GetMessageDetailsHandler.java
Sat Nov 14 16:17:23 2009
@@ -165,7 +165,6 @@
IOException {
boolean isHTML = false;
if (con instanceof String) {
- System.out.println("sc: " + message.getContentType());
if (message.getContentType().startsWith("text/html")) {
isHTML = true;
} else {
@@ -177,7 +176,6 @@
Multipart mp = (Multipart) con;
String multipartContentType = mp.getContentType().toLowerCase();
- System.out.println("mc: " + multipartContentType);
String text = null;
@@ -188,7 +186,6 @@
Part part = mp.getBodyPart(i);
String contentType = part.getContentType().toLowerCase();
- System.out.println("c: " + contentType);
if (text == null && contentType.startsWith("text/plain") )
{
isHTML = false;
@@ -230,7 +227,6 @@
Part part = mp.getBodyPart(i);
String contentType = part.getContentType().toLowerCase();
- System.out.println("m: " + contentType);
// we prefer html
if (text == null && contentType.startsWith("text/plain")) {
@@ -250,9 +246,9 @@
static Pattern regex_http = Pattern.compile(HTML_LINK_REGEXP);
static String repl_http = "<a href=\"$1\">$1</a>";
- static String EMAIL_REGEXP =
"\\b([a-z0-9._%\\+\\...@[a-z0-9\\.\\-]+\\.[a-z]{2,4})\\b";
- static Pattern regex_email = Pattern.compile(EMAIL_REGEXP);
- static String repl_email = "<a href=\"mailto:$1\">$1</a>";
+ static String EMAIL_REGEXP =
"\\b(?<![A-z0-9._%\\+\\-=])([a-z][a-z0-9._%\\+\\...@[a-z0-9\\.\\-]+\\.[a-z]{2,4})";
+ static Pattern regex_email = Pattern.compile("\\b"+ EMAIL_REGEXP);
+ static String repl_email = "<a href=\"mailto:$1\">$1</a>$1";
static Pattern regex_inlineImg =
Pattern.compile("(?si)(<\\s*img\\s+.*?src=[\"'])cid:([^\"']+[\"'])");
static String repl_inlineImg = "$1" + SConsts.HUPA +
SConsts.SERVLET_DOWNLOAD
@@ -266,8 +262,9 @@
static Pattern regex_unneededTags =
Pattern.compile("(?si)(</?(html|body)[^>]*?>)");
static String repl_unneededTags = "";
- static Pattern regex_badAttrs =
Pattern.compile("(?si)(<)(\\w+)(\\s.+?)onClick=(\".+?\"|'.+?')(.*?</)(\\2)(\\s*>)");
- static String repl_badAttrs = "$1$2$3 $5$6$7";
+ static String EVENT_ATTR_REGEX =
"(?:on[dbl]*click)|(?:onmouse[a-z]+)|(?:onkey[a-z]+)";
+ static Pattern regex_badAttrs = Pattern.compile("(?si)(<\\w+[^<>]*)\\s+("+
EVENT_ATTR_REGEX + ")=[\"']?([^\\s<>]+?)[\"']?([\\s>])");
+ static String repl_badAttrs = "$1$4";
static Pattern regex_orphandHttpLinks =
Pattern.compile("(?si)(?!.*<a\\s?[^>]*?>.+</a\\s*>.*)(<[^<]*?>[^<>]*)" +
HTML_LINK_REGEXP + "([^<>]*<[^>]*?>)");
static String repl_orphandHttpLinks = "$1<a href=\"$2\">$2</a>$3";
@@ -281,8 +278,15 @@
static Pattern regex_existingEmailLinks =
Pattern.compile("(?si)<a\\s[^>]*?href=[\"']*mailto:[\"']?([^\"]+)[\"']?");
static String repl_existngEmailLinks = "<a onClick=\"mailTo('$1');return
false;\" href=\"mailto:$1\"";
- protected String replaceAll(String txt, Pattern pattern, String repl) {
- return pattern.matcher(txt).replaceAll(repl);
+
+ protected String replaceAll(String txt, Pattern pattern, String
replacement) {
+ return pattern.matcher(txt).replaceAll(replacement);
+ }
+
+ protected String replaceAllRecursive(String txt, Pattern pattern, String
replacement) {
+ while (pattern.matcher(txt).find())
+ txt = pattern.matcher(txt).replaceAll(replacement);
+ return txt;
}
protected String txtDocumentToHtml(String txt, String folderName, long
uuid) {
@@ -321,7 +325,7 @@
// Remove body and html tags
html = replaceAll(html, regex_unneededTags, repl_unneededTags);
// Remove all onClick attributes
- html = replaceAll(html, regex_badAttrs, repl_badAttrs);
+ html = replaceAllRecursive(html, regex_badAttrs, repl_badAttrs);
// Add <a> tags to links which are not already into <a>
html = replaceAll(html, regex_orphandHttpLinks, repl_orphandHttpLinks);
// Add javascript method to <a> in order to open links in a different
window
Modified:
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
URL:
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
---
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
(original)
+++
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/handler/GetMessageDetailsHandlerTest.java
Sat Nov 14 16:17:23 2009
@@ -42,9 +42,13 @@
public void testRegexEmail() {
String txt, res;
- txt = "!abcd091_%[email protected]+";
+ txt = "!'bcd091_%[email protected]+";
res = handler.replaceAll(txt, GetMessageDetailsHandler.regex_email,
"");
- assertEquals("!+", res);
+ assertEquals("!'+", res);
+
+ txt = "!'=bcd091_%[email protected]+";
+ res = handler.replaceAll(txt, GetMessageDetailsHandler.regex_email,
"");
+ assertEquals(txt, res);
}
public void testRegexInlineImg() {
@@ -61,6 +65,18 @@
res = handler.replaceAll(res,
GetMessageDetailsHandler.regex_unneededTags,
GetMessageDetailsHandler.repl_unneededTags);
assertEquals("...", res);
}
+
+ public void testRegexBadAttributes() {
+
+ String txt, res;
+ txt = "... <div attr=a onClick=\"something('');\"> ...";
+ res = handler.replaceAllRecursive(txt,
GetMessageDetailsHandler.regex_badAttrs,
GetMessageDetailsHandler.repl_badAttrs);
+ assertEquals("... <div attr=a> ...", res);
+
+ txt = "... <div attr=a onClick=\"something('');\" attr=b
onMouseOver=whatever attr=c onKeyup=\"\" /> ...";
+ res = handler.replaceAllRecursive(txt,
GetMessageDetailsHandler.regex_badAttrs,
GetMessageDetailsHandler.repl_badAttrs);
+ assertEquals("... <div attr=a attr=b attr=c /> ...", res);
+ }
public void testRegexHtmlLinks() {
String txt, res;
@@ -185,6 +201,12 @@
res = handler.filterHtmlDocument(msg, "aFolder", 9999l);
assertFalse(res.contains("mailTo("));
assertTrue(res.contains("openLink("));
+
+ msg =
"http://accounts.myspace.com.deaaaf.me.uk/msp/index.php?fuseaction=update&code=78E2BL6-EKY5L893K4MHSA-74ESO-D743U41GYB18J-FA18EI698V4M&[email protected]";
+ res = handler.txtDocumentToHtml(msg, "aFolder", 9999l);
+ assertFalse(res.contains("mailTo("));
+ assertTrue(res.contains("openLink("));
+
}
private MessageDetails loadMessageDetails(String msgFile) throws
Exception {
Modified:
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
URL:
http://svn.apache.org/viewvc/james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java?rev=836203&r1=836202&r2=836203&view=diff
==============================================================================
---
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
(original)
+++
james/hupa/trunk/server/src/test/java/org/apache/hupa/server/servlet/DownloadAttachmentServletTest.java
Sat Nov 14 16:17:23 2009
@@ -19,7 +19,6 @@
Part part = DownloadAttachmentServlet.handleMultiPart(message
.getContent(),
"[email protected]");
assertNotNull(part);
- System.out.println(part.getContentType());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]