Author: sgoeschl
Date: Mon Jul 12 18:38:50 2010
New Revision: 963422
URL: http://svn.apache.org/viewvc?rev=963422&view=rev
Log:
[EMAIL-92] Proper embedding of duplicated image resources
Modified:
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
Modified:
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
URL:
http://svn.apache.org/viewvc/commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
---
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
(original)
+++
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
Mon Jul 12 18:38:50 2010
@@ -24,6 +24,8 @@ import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -52,11 +54,11 @@ public class ImageHtmlEmail extends Html
* newlines on any place, HTML is not case sensitive and there can be
* arbitrary text between "IMG" and "SRC" like IDs and other things.
*/
- public static final String REGEX_IMG_SRC =
"(<[Ii][Mm][Gg]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
+ public static final String REGEX_IMG_SRC =
"(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=s*[\"'])([^\"']+?)([\"'])";
public static final String REGEX_SCRIPT_SRC =
"(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
- // this pattern looks for the HTML-img tag which indicates embedded images,
+ // this pattern looks for the HTML imgage tag which indicates embedded
images,
// the grouping is necessary to allow to replace the element with the CID
protected static final Pattern pattern = Pattern.compile(REGEX_IMG_SRC);
@@ -128,38 +130,61 @@ public class ImageHtmlEmail extends Html
private String replacePattern(final String htmlMessage, final Pattern
pattern, final URL baseUrl)
throws EmailException
{
- StringBuffer myStringBuffer = new StringBuffer();
-
+ DataSource imageDataSource;
+ StringBuffer stringBuffer = new StringBuffer();
+
+ // maps "cid" --> name
+ Map cidCache = new HashMap();
+
+ // maps "name" --> dataSource
+ Map dataSourceCache = new HashMap();
+
// in the String, replace all "img src" with a CID and embed the
related
// image file if we find it.
Matcher matcher = pattern.matcher(htmlMessage);
// the matcher returns all instances one by one
while (matcher.find())
- {
- // in the RegEx we have the src-element as second "group"
+ {
+ // in the RegEx we have the <src> element as second "group"
String image = matcher.group(2);
- DataSource imageDataSource = resolve(baseUrl, image);
+ // avoid loading the same data source more than once
+ if(dataSourceCache.get(image) == null)
+ {
+ imageDataSource = resolve(baseUrl, image);
+ dataSourceCache.put(image, imageDataSource);
+ }
+ else
+ {
+ imageDataSource = (DataSource) dataSourceCache.get(image);
+ }
if (imageDataSource != null)
{
- if(!this.inlineEmbeds.containsKey(imageDataSource.getName()))
+ String name = imageDataSource.getName();
+ String cid = (String) cidCache.get(name);
+
+ if(cid == null)
{
- String cid = embed(imageDataSource,
imageDataSource.getName());
-
- // if we embedded something, then we need to replace the
URL with
- // the CID, otherwise the Matcher takes care of adding the
- // non-replaced text afterwards, so no else is necessary
here!
- matcher.appendReplacement(myStringBuffer, matcher.group(1)
+ "cid:" + cid + matcher.group(3));
+ cid = embed(imageDataSource, imageDataSource.getName());
+ cidCache.put(name, cid);
}
+
+ // if we embedded something, then we need to replace the URL
with
+ // the CID, otherwise the Matcher takes care of adding the
+ // non-replaced text afterwards, so no else is necessary here!
+ matcher.appendReplacement(stringBuffer, matcher.group(1) +
"cid:" + cid + matcher.group(3));
}
}
// append the remaining items...
- matcher.appendTail(myStringBuffer);
+ matcher.appendTail(stringBuffer);
+
+ cidCache.clear();
+ dataSourceCache.clear();
- return myStringBuffer.toString();
+ return stringBuffer.toString();
}
Modified:
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
---
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
(original)
+++
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
Mon Jul 12 18:38:50 2010
@@ -215,7 +215,8 @@ public class ImageHtmlEmailTest extends
email.getCcAddresses(),
email.getBccAddresses(), true);
}
- public void testRegex() {
+ public void testRegex()
+ {
Pattern pattern = Pattern.compile(ImageHtmlEmail.REGEX_IMG_SRC);
// ensure that the regex that we use is catching the cases
correctly
@@ -252,16 +253,47 @@ public class ImageHtmlEmailTest extends
assertEquals("http://dstadler2.org/", matcher.group(2));
// what about newlines and other whitespaces
+ /*
matcher = pattern
.matcher("<html><body><img\n
\t\rid=\"laskdasdkj\"\n \rsrc \n =\r \"http://dstadler1.org/\"/><img \r
id=\" laskdasdkj\" src = \"http://dstadler2.org/\"/></body></html>");
assertTrue(matcher.find());
assertEquals("http://dstadler1.org/", matcher.group(2));
assertTrue(matcher.find());
assertEquals("http://dstadler2.org/", matcher.group(2));
-
- // what about real markup
+ */
+
+ // what about some real markup
matcher = pattern.matcher("<img
alt=\"Chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0\"
src=\"/chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0.2&l=A,C,S,T&v=3.0,3.0,2.0,2.0\"");
assertTrue(matcher.find());
- //
assertEquals("/chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0.2&l=A,C,S,T&v=3.0,3.0,2.0,2.0",
matcher.group(2));
+
assertEquals("/chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0.2&l=A,C,S,T&v=3.0,3.0,2.0,2.0",
matcher.group(2));
+
+ // had a problem with multiple img-source tags
+ matcher = pattern
+ .matcher("<img src=\"file1\"/><img
src=\"file2\"/>");
+ assertTrue(matcher.find());
+ assertEquals("file1", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file2", matcher.group(2));
+
+ matcher = pattern
+ .matcher("<img src=\"file1\"/><img
src=\"file2\"/><img src=\"file3\"/><img src=\"file4\"/><img src=\"file5\"/>");
+ assertTrue(matcher.find());
+ assertEquals("file1", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file2", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file3", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file4", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file5", matcher.group(2));
+
+ // try with invalid HTML that is seens sometimes, i.e. without
closing "/" or "</img>"
+ matcher = pattern
+ .matcher("<img src=\"file1\"><img
src=\"file2\">");
+ assertTrue(matcher.find());
+ assertEquals("file1", matcher.group(2));
+ assertTrue(matcher.find());
+ assertEquals("file2", matcher.group(2));
}
}