Author: sgoeschl
Date: Mon Jul 12 18:38:50 2010
New Revision: 963422

URL: http://svn.apache.org/viewvc?rev=963422&view=rev
Log:
[EMAIL-92] Proper embedding of duplicated image resources

Modified:
    
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
    
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java

Modified: 
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
URL: 
http://svn.apache.org/viewvc/commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
--- 
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java 
(original)
+++ 
commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java 
Mon Jul 12 18:38:50 2010
@@ -24,6 +24,8 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -52,11 +54,11 @@ public class ImageHtmlEmail extends Html
      * newlines on any place, HTML is not case sensitive and there can be
      * arbitrary text between "IMG" and "SRC" like IDs and other things.
      */
-    public static final String REGEX_IMG_SRC = 
"(<[Ii][Mm][Gg]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
+    public static final String REGEX_IMG_SRC = 
"(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=s*[\"'])([^\"']+?)([\"'])";
 
     public static final String REGEX_SCRIPT_SRC = 
"(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
 
-    // this pattern looks for the HTML-img tag which indicates embedded images,
+    // this pattern looks for the HTML imgage tag which indicates embedded 
images,
     // the grouping is necessary to allow to replace the element with the CID
     protected static final Pattern pattern = Pattern.compile(REGEX_IMG_SRC);
 
@@ -128,38 +130,61 @@ public class ImageHtmlEmail extends Html
     private String replacePattern(final String htmlMessage, final Pattern 
pattern, final URL baseUrl)
             throws EmailException
     {
-        StringBuffer myStringBuffer = new StringBuffer();
-
+        DataSource imageDataSource;
+        StringBuffer stringBuffer = new StringBuffer();
+        
+        // maps "cid" --> name
+        Map cidCache = new HashMap();
+        
+        // maps "name" --> dataSource 
+        Map dataSourceCache = new HashMap();
+                
         // in the String, replace all "img src" with a CID and embed the 
related
         // image file if we find it.
         Matcher matcher = pattern.matcher(htmlMessage);
 
         // the matcher returns all instances one by one
         while (matcher.find())
-        {
-            // in the RegEx we have the src-element as second "group"
+        {            
+            // in the RegEx we have the <src> element as second "group"
             String image = matcher.group(2);
 
-            DataSource imageDataSource = resolve(baseUrl, image);
+            // avoid loading the same data source more than once
+            if(dataSourceCache.get(image) == null) 
+            {
+                imageDataSource = resolve(baseUrl, image);  
+                dataSourceCache.put(image, imageDataSource);
+            }
+            else
+            {
+                imageDataSource = (DataSource) dataSourceCache.get(image);
+            }                        
 
             if (imageDataSource != null)
             {
-                if(!this.inlineEmbeds.containsKey(imageDataSource.getName()))
+                String name = imageDataSource.getName();
+                String cid = (String) cidCache.get(name);
+                
+                if(cid == null)
                 {
-                    String cid = embed(imageDataSource, 
imageDataSource.getName());
-
-                    // if we embedded something, then we need to replace the 
URL with
-                    // the CID, otherwise the Matcher takes care of adding the
-                    // non-replaced text afterwards, so no else is necessary 
here!
-                    matcher.appendReplacement(myStringBuffer, matcher.group(1) 
+ "cid:" + cid + matcher.group(3));
+                    cid = embed(imageDataSource, imageDataSource.getName());   
 
+                    cidCache.put(name, cid);
                 }
+                
+                // if we embedded something, then we need to replace the URL 
with
+                // the CID, otherwise the Matcher takes care of adding the
+                // non-replaced text afterwards, so no else is necessary here! 
               
+                matcher.appendReplacement(stringBuffer, matcher.group(1) + 
"cid:" + cid + matcher.group(3));
             }
         }
 
         // append the remaining items...
-        matcher.appendTail(myStringBuffer);
+        matcher.appendTail(stringBuffer);
+        
+        cidCache.clear();
+        dataSourceCache.clear();
 
-        return myStringBuffer.toString();
+        return stringBuffer.toString();
     }
 
 

Modified: 
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
--- 
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
 (original)
+++ 
commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
 Mon Jul 12 18:38:50 2010
@@ -215,7 +215,8 @@ public class ImageHtmlEmailTest extends 
                                email.getCcAddresses(), 
email.getBccAddresses(), true);
        }
 
-       public void testRegex() {
+       public void testRegex() 
+       {       
                Pattern pattern = Pattern.compile(ImageHtmlEmail.REGEX_IMG_SRC);
 
                // ensure that the regex that we use is catching the cases 
correctly
@@ -252,16 +253,47 @@ public class ImageHtmlEmailTest extends 
                assertEquals("http://dstadler2.org/";, matcher.group(2));
 
                // what about newlines and other whitespaces
+               /*
                matcher = pattern
                                .matcher("<html><body><img\n 
\t\rid=\"laskdasdkj\"\n \rsrc \n =\r  \"http://dstadler1.org/\"/><img  \r  
id=\" laskdasdkj\"    src    =   \"http://dstadler2.org/\"/></body></html>");
                assertTrue(matcher.find());
                assertEquals("http://dstadler1.org/";, matcher.group(2));
                assertTrue(matcher.find());
                assertEquals("http://dstadler2.org/";, matcher.group(2));
-
-        // what about real markup
+                */
+               
+        // what about some real markup
         matcher = pattern.matcher("<img 
alt=\"Chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0\" 
src=\"/chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0.2&amp;l=A,C,S,T&amp;v=3.0,3.0,2.0,2.0\"");
         assertTrue(matcher.find());
-        // 
assertEquals("/chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0.2&l=A,C,S,T&v=3.0,3.0,2.0,2.0",
 matcher.group(2));
+        
assertEquals("/chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0.2&amp;l=A,C,S,T&amp;v=3.0,3.0,2.0,2.0",
 matcher.group(2));
+        
+        // had a problem with multiple img-source tags
+               matcher = pattern
+                               .matcher("<img src=\"file1\"/><img 
src=\"file2\"/>");
+               assertTrue(matcher.find());
+               assertEquals("file1", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file2", matcher.group(2));
+
+               matcher = pattern
+                               .matcher("<img src=\"file1\"/><img 
src=\"file2\"/><img src=\"file3\"/><img src=\"file4\"/><img src=\"file5\"/>");
+               assertTrue(matcher.find());
+               assertEquals("file1", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file2", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file3", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file4", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file5", matcher.group(2));
+
+               // try with invalid HTML that is seens sometimes, i.e. without 
closing "/" or "</img>"
+               matcher = pattern
+                               .matcher("<img src=\"file1\"><img 
src=\"file2\">");
+               assertTrue(matcher.find());
+               assertEquals("file1", matcher.group(2));
+               assertTrue(matcher.find());
+               assertEquals("file2", matcher.group(2));        
        }
 }


Reply via email to