Author: scottbw
Date: Mon Jul  4 11:06:32 2011
New Revision: 1142624

URL: http://svn.apache.org/viewvc?rev=1142624&view=rev
Log:
Implemented override for charset and content-type for existing meta tags; see 
WOOKIE-217

Modified:
    
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
    incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java

Modified: 
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
URL: 
http://svn.apache.org/viewvc/incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java?rev=1142624&r1=1142623&r2=1142624&view=diff
==============================================================================
--- 
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java 
(original)
+++ 
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java 
Mon Jul  4 11:06:32 2011
@@ -157,7 +157,7 @@ public class HtmlCleanerTest {
        
   @Test
   public void overrideCharset() throws IOException{
-    String in = "<html><head><meta http-equiv=\"content-type\" 
content=\"text/xhtml;charset=ASCII\" /></head><body></body></html>";
+    String in = "<html><head><meta http-equiv=\"Content-Type\" 
content=\"text/xhtml;charset=ASCII\" /></head><body></body></html>";
     StringWriter out = new StringWriter();
     HtmlCleaner cleaner = new HtmlCleaner();
     cleaner.setReader(new StringReader(in));

Modified: 
incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
URL: 
http://svn.apache.org/viewvc/incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java?rev=1142624&r1=1142623&r2=1142624&view=diff
==============================================================================
--- incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java 
(original)
+++ incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java Mon 
Jul  4 11:06:32 2011
@@ -87,17 +87,34 @@ public class HtmlCleaner implements IHtm
                headNode.addChild(js);
        }
 
-       /* (non-Javadoc)
-        * @see 
org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
-        */
-       public void setTypeAndCharset(String type, String charset) {
-               // This overrides any existing encoding information in the HTML 
file.
-               TagNode meta = new TagNode(META_TAG);
-               meta.addAttribute("http-equiv", "Content-Type");
-               if (charset.equals("UTF-8")) charset="utf-8";
-               meta.addAttribute("content", type+";charset="+charset);
-               headNode.getChildren().add(0, meta);
-       }
+  /* (non-Javadoc)
+   * @see 
org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
+   */
+  @SuppressWarnings("unchecked")
+  public void setTypeAndCharset(String type, String charset) {
+    // NB This overrides any existing encoding information in the HTML file.
+    
+    //
+    // Check if the page already has a META http-equiv=content-type tag,
+    // if it doesn't create one and add it to the head node
+    //
+    TagNode meta = headNode.findElementByAttValue("http-equiv", 
"content-type", true, false);
+    if (meta == null) {
+      meta = new TagNode(META_TAG);
+      meta.addAttribute("http-equiv", "Content-Type");
+      headNode.getChildren().add(0, meta);
+    }
+    //
+    // Force UTF into lowercase
+    //
+    if (charset.equals("UTF-8")) charset = "utf-8";
+    
+    //
+    // Override the charset and content-type values for the 
+    // META http-equiv=content-type tag
+    //
+    meta.addAttribute("content", type + ";charset=" + charset);
+  }
        
        /* (non-Javadoc)
         * @see org.apache.wookie.util.html.IHtmlProcessor#process()


Reply via email to