Author: scottbw
Date: Mon Jul 4 11:06:32 2011
New Revision: 1142624
URL: http://svn.apache.org/viewvc?rev=1142624&view=rev
Log:
Implemented override for charset and content-type for existing meta tags; see
WOOKIE-217
Modified:
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
Modified:
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
URL:
http://svn.apache.org/viewvc/incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java?rev=1142624&r1=1142623&r2=1142624&view=diff
==============================================================================
---
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
(original)
+++
incubator/wookie/trunk/src-tests/org/apache/wookie/tests/HtmlCleanerTest.java
Mon Jul 4 11:06:32 2011
@@ -157,7 +157,7 @@ public class HtmlCleanerTest {
@Test
public void overrideCharset() throws IOException{
- String in = "<html><head><meta http-equiv=\"content-type\"
content=\"text/xhtml;charset=ASCII\" /></head><body></body></html>";
+ String in = "<html><head><meta http-equiv=\"Content-Type\"
content=\"text/xhtml;charset=ASCII\" /></head><body></body></html>";
StringWriter out = new StringWriter();
HtmlCleaner cleaner = new HtmlCleaner();
cleaner.setReader(new StringReader(in));
Modified:
incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
URL:
http://svn.apache.org/viewvc/incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java?rev=1142624&r1=1142623&r2=1142624&view=diff
==============================================================================
--- incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
(original)
+++ incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java Mon
Jul 4 11:06:32 2011
@@ -87,17 +87,34 @@ public class HtmlCleaner implements IHtm
headNode.addChild(js);
}
- /* (non-Javadoc)
- * @see
org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
- */
- public void setTypeAndCharset(String type, String charset) {
- // This overrides any existing encoding information in the HTML
file.
- TagNode meta = new TagNode(META_TAG);
- meta.addAttribute("http-equiv", "Content-Type");
- if (charset.equals("UTF-8")) charset="utf-8";
- meta.addAttribute("content", type+";charset="+charset);
- headNode.getChildren().add(0, meta);
- }
+ /* (non-Javadoc)
+ * @see
org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
+ */
+ @SuppressWarnings("unchecked")
+ public void setTypeAndCharset(String type, String charset) {
+ // NB This overrides any existing encoding information in the HTML file.
+
+ //
+ // Check if the page already has a META http-equiv=content-type tag,
+ // if it doesn't create one and add it to the head node
+ //
+ TagNode meta = headNode.findElementByAttValue("http-equiv",
"content-type", true, false);
+ if (meta == null) {
+ meta = new TagNode(META_TAG);
+ meta.addAttribute("http-equiv", "Content-Type");
+ headNode.getChildren().add(0, meta);
+ }
+ //
+ // Force UTF into lowercase
+ //
+ if (charset.equals("UTF-8")) charset = "utf-8";
+
+ //
+ // Override the charset and content-type values for the
+ // META http-equiv=content-type tag
+ //
+ meta.addAttribute("content", type + ";charset=" + charset);
+ }
/* (non-Javadoc)
* @see org.apache.wookie.util.html.IHtmlProcessor#process()