Author: johnh Date: Wed Nov 18 00:14:45 2009 New Revision: 881629 URL: http://svn.apache.org/viewvc?rev=881629&view=rev Log: Fixes accommodating more Neko 1.9.13 behavior.
1. Reintroduces document normalization behavior, in particular ensuring that <body> exists in the returned document. 2. Pushes all <script> elements in parsed HTML <head> element to the start of <body>, in order. This ensures that gadget scripts referencing document.body work appropriately. This may have odd side-effects if a particular gadget server installation oscillates between String- and DOM-based rewriters, since each String-to-DOM conversion (in MutableContent) will see <head><script> moved to <body>. Still, this change should be functionally intact. I'm working on a GadgetHtmlParser subclass using Caja's non-validating/rejiggering parser now, which better fits with Shindig's use case. This code will hopefully tide us over until that time. Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=881629&r1=881628&r2=881629&view=diff ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java (original) +++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java Wed Nov 18 00:14:45 2009 @@ -19,6 +19,7 @@ package org.apache.shindig.gadgets.parse.nekohtml; import org.apache.commons.lang.StringUtils; +import org.apache.shindig.common.xml.DomUtil; import org.apache.shindig.gadgets.GadgetException; import org.apache.shindig.gadgets.parse.GadgetHtmlParser; import org.apache.xerces.xni.Augmentations; @@ -48,6 +49,7 @@ import org.w3c.dom.DocumentFragment; import org.w3c.dom.Element; import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import java.io.IOException; import java.io.StringReader; @@ -107,6 +109,7 @@ Document document = handler.getDocument(); DocumentFragment fragment = handler.getFragment(); normalizeFragment(document, fragment); + fixNekoWeirdness(document); return document; } @@ -164,6 +167,51 @@ htmlScanner.scanDocument(true); return handler; } + + private void fixNekoWeirdness(Document document) { + // Neko as of versions > 1.9.13 stuffs all leading <script> nodes into <head>. + // This breaks all sorts of assumptions in gadgets, notably the existence of document.body. + // We can't tell Neko to avoid putting <script> into <head> however, since gadgets + // like <Content><script>...</script><style>...</style> will break due to both + // <script> and <style> ending up in <body> -- at which point Neko unceremoniously + // drops the <style> (and <link>) elements. + // Therefore we just search for <script> elements in <head> and stuff them all into + // the top of <body>. + // This method assumes a normalized document as input. + Node html = DomUtil.getFirstNamedChildNode(document, "html"); + if (html.getNextSibling() != null && + html.getNextSibling().getNodeName().equalsIgnoreCase("html")) { + // if a doctype is specified, then the desired root <html> node is wrapped by an <HTML> node + // Pull out the <html> root. + html = html.getNextSibling(); + } + Node head = DomUtil.getFirstNamedChildNode(html, "head"); + if (head == null) { + head = document.createElement("head"); + html.insertBefore(head, html.getFirstChild()); + } + NodeList headNodes = head.getChildNodes(); + Stack<Node> headScripts = new Stack<Node>(); + for (int i = 0; i < headNodes.getLength(); ++i) { + Node headChild = headNodes.item(i); + if (headChild.getNodeName().equalsIgnoreCase("script")) { + headScripts.add(headChild); + } + } + + // Remove from head, add to top of <body> in <head> order. + Node body = DomUtil.getFirstNamedChildNode(html, "body"); + if (body == null) { + body = document.createElement("body"); + html.insertBefore(body, head.getNextSibling()); + } + Node bodyFirst = body.getFirstChild(); + while (headScripts.size() > 0) { + Node headScript = headScripts.pop(); + head.removeChild(headScript); + body.insertBefore(headScript, bodyFirst); + } + } protected HTMLConfiguration newConfiguration() { HTMLConfiguration config = new HTMLConfiguration(); Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?rev=881629&r1=881628&r2=881629&view=diff ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java (original) +++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java Wed Nov 18 00:14:45 2009 @@ -68,5 +68,11 @@ String expected = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html"); parseAndCompareBalanced(content, expected, simple); } + + public void testScriptPushedToBody() throws Exception { + String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html"); + String expected = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html"); + parseAndCompareBalanced(content, expected, simple); + } } Modified: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html?rev=881629&r1=881628&r2=881629&view=diff ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html (original) +++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html Wed Nov 18 00:14:45 2009 @@ -1,2 +1,2 @@ -<html><head><script>document.write("dont add to head or else")</script> -<style type="text/css"> A { font : bold; }</style></head><body></body></html> \ No newline at end of file +<html><head> +<style type="text/css"> A { font : bold; }</style></head><body><script>document.write("dont add to head or else")</script></body></html> \ No newline at end of file Modified: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html?rev=881629&r1=881628&r2=881629&view=diff ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html (original) +++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html Wed Nov 18 00:14:45 2009 @@ -1,3 +1,3 @@ <html><head> <!-- A head tag but no body tag is not good --> -<script>document.write("dont add to head or else")</script><style type="text/css"> A { font : bold; } </style></head><body></body></html> \ No newline at end of file +<style type="text/css"> A { font : bold; } </style></head><body><script>document.write("dont add to head or else")</script></body></html> \ No newline at end of file Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html?rev=881629&view=auto ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html (added) +++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html Wed Nov 18 00:14:45 2009 @@ -0,0 +1,6 @@ +<html><head> +<style>Some CSS here</style> + +<link rel="linkrel"> + +</head><body><script>foo3();</script><script>foo2();</script><script>foo1();</script><div id="mydiv">mycontent</div></body></html> \ No newline at end of file Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html?rev=881629&view=auto ============================================================================== --- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html (added) +++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html Wed Nov 18 00:14:45 2009 @@ -0,0 +1,6 @@ +<script>foo1();</script> +<style>Some CSS here</style> +<script>foo2();</script> +<link rel="linkrel"/> +<script>foo3();</script> +<div id="mydiv">mycontent</div> \ No newline at end of file