Author: lryan
Date: Fri Nov 21 14:33:15 2008
New Revision: 719736
URL: http://svn.apache.org/viewvc?rev=719736&view=rev
Log:
Do not inject doctypes if none existed in the original content.
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
Fri Nov 21 14:33:15 2008
@@ -51,7 +51,7 @@
* @return true if we detect a preamble of doctype or html
*/
protected static boolean attemptFullDocParseFirst(String content) {
- String normalized = content.substring(Math.min(100,
content.length())).toUpperCase();
+ String normalized = content.substring(0, Math.min(100,
content.length())).toUpperCase();
return normalized.contains("<!DOCTYPE") || normalized.contains("<HTML");
}
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
Fri Nov 21 14:33:15 2008
@@ -17,7 +17,6 @@
*/
package org.apache.shindig.gadgets.parse.nekohtml;
-import org.apache.shindig.common.xml.DomUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerializer;
@@ -28,6 +27,7 @@
import org.apache.xml.serialize.HTMLSerializer;
import org.apache.xml.serialize.OutputFormat;
import org.cyberneko.html.parsers.DOMFragmentParser;
+import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
@@ -69,26 +69,34 @@
private Document parseFragment(String source) throws SAXException,
IOException {
InputSource input = new InputSource(new StringReader(source));
- DOMFragmentParser parser = new DOMFragmentParser();
- parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"default");
- parser.setFeature("http://cyberneko.org/html/features/document-fragment",
true);
-
- Document htmlDoc = documentProvider.createDocument(null, null, null);
- DocumentFragment fragment = htmlDoc.createDocumentFragment();
- parser.parse(input, fragment);
- normalizeFragment(htmlDoc, fragment);
- return htmlDoc;
+ if (attemptFullDocParseFirst(source)) {
+ DOMParser parser = new DOMParser();
+ // Force parser not to use HTMLDocumentImpl as document implementation
+
parser.setProperty("http://apache.org/xml/properties/dom/document-class-name",
null);
+ parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"default");
+ parser.parse(input);
+ return parser.getDocument();
+ } else {
+ Document htmlDoc = documentProvider.createDocument(null, null, null);
+ DOMFragmentParser parser = new DOMFragmentParser();
+ parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"default");
+
parser.setFeature("http://cyberneko.org/html/features/document-fragment", true);
+ DocumentFragment fragment = htmlDoc.createDocumentFragment();
+ parser.parse(input, fragment);
+ normalizeFragment(htmlDoc, fragment);
+ return htmlDoc;
+ }
}
static class Serializer extends HtmlSerializer {
- static final OutputFormat outputFormat = new OutputFormat();
- static {
+ public String serializeImpl(Document doc) {
+ OutputFormat outputFormat = new OutputFormat();
outputFormat.setPreserveSpace(true);
outputFormat.setPreserveEmptyAttributes(false);
- }
-
- public String serializeImpl(Document doc) {
+ if (doc.getDoctype() == null) {
+ outputFormat.setOmitDocumentType(true);
+ }
StringWriter sw = createWriter(doc);
HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat);
try {
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Fri Nov 21 14:33:15 2008
@@ -320,14 +320,14 @@
static class Serializer extends HtmlSerializer {
- static final OutputFormat outputFormat = new OutputFormat();
- static {
- outputFormat.setPreserveSpace(true);
- outputFormat.setPreserveEmptyAttributes(false);
- }
-
@Override
public String serializeImpl(Document doc) {
+ OutputFormat outputFormat = new OutputFormat();
+ outputFormat.setPreserveSpace(true);
+ outputFormat.setPreserveEmptyAttributes(false);
+ if (doc.getDoctype() == null) {
+ outputFormat.setOmitDocumentType(true);
+ }
StringWriter sw = createWriter(doc);
HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat) {
// Overridden to prevent escaping of literal text
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
Fri Nov 21 14:33:15 2008
@@ -36,7 +36,8 @@
private NekoHtmlParser full = new NekoHtmlParser(
new ParseModule.DOMImplementationProvider().get());
- public void testParser() throws Exception {
+ public void testDocWithDoctype() throws Exception {
+ // Note that doctype is properly retained
String content = IOUtils.toString(this.getClass().getClassLoader().
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test.html"));
String expected = IOUtils.toString(this.getClass().getClassLoader().
@@ -45,7 +46,16 @@
parseAndCompareBalanced(content, expected, simple);
}
+ public void testDocNoDoctype() throws Exception {
+ // Note that no doctype is properly created when none specified
+ String content = IOUtils.toString(this.getClass().getClassLoader().
+
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html"));
+ assertNull(full.parseDom(content).getDoctype());
+ assertNull(simple.parseDom(content).getDoctype());
+ }
+
public void testNotADocument() throws Exception {
+ // Note that no doctype is injected for fragments
String content = IOUtils.toString(this.getClass().getClassLoader().
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-fragment.html"));
String expected = IOUtils.toString(this.getClass().getClassLoader().
@@ -55,6 +65,7 @@
}
public void testNoBody() throws Exception {
+ // Note that no doctype is injected for fragments
String content = IOUtils.toString(this.getClass().getClassLoader().
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-headnobody.html"));
String expected = IOUtils.toString(this.getClass().getClassLoader().
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
Fri Nov 21 14:33:15 2008
@@ -1,4 +1,4 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head id="head">
<link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
Fri Nov 21 14:33:15 2008
@@ -1,3 +1,2 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html><head></head><body><script>document.write("dont add to head or
else")</script>
<style type="text/css"> can go in head</style></body></html>
\ No newline at end of file
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html?rev=719736&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html
(added)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html
Fri Nov 21 14:33:15 2008
@@ -0,0 +1,6 @@
+<html>
+ <head></head>
+ <body>
+ <!-- This is a full doc with no doctype -->
+ </body>
+</html>
\ No newline at end of file
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
Fri Nov 21 14:33:15 2008
@@ -1,4 +1,3 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html><head>
<!-- A head tag but no body tag is not good -->
</head><body>
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test.html?rev=719736&r1=719735&r2=719736&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test.html
Fri Nov 21 14:33:15 2008
@@ -1,4 +1,4 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head id="head">
<link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">