Author: psharples
Date: Fri Oct 7 11:20:23 2011
New Revision: 1180006
URL: http://svn.apache.org/viewvc?rev=1180006&view=rev
Log:
Fix for HTMLCleaners bad rewriting of HTML5 doctypes. (it originally added a
null and an empty string to <!DOCTYPE html> type declarations.
Added:
incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java
(with props)
Modified:
incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
Added:
incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java
URL:
http://svn.apache.org/viewvc/incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java?rev=1180006&view=auto
==============================================================================
---
incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java
(added)
+++
incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java
Fri Oct 7 11:20:23 2011
@@ -0,0 +1,42 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.wookie.util.html;
+
+import org.htmlcleaner.DoctypeToken;
+
+/**
+ *
+ * An extended HTML Cleaner DocTypeToken class to deal with HTML5 declarations
better then the default, which displays emtpy strings and nulls.
+ *
+ * Note: <!DOCTYPE html SYSTEM "about:legacy-compat"> is also a valid HTML5
doctype - but html cleaner only makes the html
+ * into uppercase, which although is still not correct, doesn't seem to cause
problems in wookie at present.
+ *
+ *
http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696
+ *
+ */
+public class Html5DoctypeToken extends DoctypeToken {
+
+ public static String BADDOCTYPE = "<!DOCTYPE HTML null \"\">";
+ public static String GOODDOCTYPE = "<!DOCTYPE html>";
+
+ public Html5DoctypeToken(String part1, String part2, String part3,
+ String part4) {
+ super(part1, part2, part3, part4);
+ }
+
+ public String getContent(){
+ return GOODDOCTYPE;
+ }
+
+}
Propchange:
incubator/wookie/trunk/src/org/apache/wookie/util/html/Html5DoctypeToken.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
URL:
http://svn.apache.org/viewvc/incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java?rev=1180006&r1=1180005&r2=1180006&view=diff
==============================================================================
--- incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java
(original)
+++ incubator/wookie/trunk/src/org/apache/wookie/util/html/HtmlCleaner.java Fri
Oct 7 11:20:23 2011
@@ -20,6 +20,7 @@ import java.util.ArrayList;
import java.util.List;
import org.htmlcleaner.CleanerProperties;
+import org.htmlcleaner.DoctypeToken;
import org.htmlcleaner.TagNode;
/**
@@ -64,11 +65,12 @@ public class HtmlCleaner implements IHtm
public void setReader(Reader reader) throws IOException{
if (reader == null) throw new IOException("Reader was null");
this.reader = reader;
- htmlNode = cleaner.clean(this.reader);
+ htmlNode = cleaner.clean(this.reader);
headNode = htmlNode.findElementByName(HEAD_TAG, false);
// remove widget-specific scripts. These will be replaced
// after processing, so that the injected scripts come first
removeUserScripts();
+ fixHTML5Doctype();
}
/* (non-Javadoc)
@@ -167,5 +169,17 @@ public class HtmlCleaner implements IHtm
headNode.addChild(node);
}
}
+
+ /**
+ * Fix for a bug in HTMLCleaner which cannot handle HTML5 doctypes
correctly
+ * See
http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696
+ */
+ private void fixHTML5Doctype(){
+ DoctypeToken docType = htmlNode.getDocType();
+
if(docType.getContent().equalsIgnoreCase(Html5DoctypeToken.BADDOCTYPE)){
+ Html5DoctypeToken newToken = new
Html5DoctypeToken("html",null,null,null);
+ htmlNode.setDocType(newToken);
+ }
+ }
}