Author: johnh
Date: Wed Nov 18 00:14:45 2009
New Revision: 881629

URL: http://svn.apache.org/viewvc?rev=881629&view=rev
Log:
Fixes accommodating more Neko 1.9.13 behavior.

1. Reintroduces document normalization behavior, in particular ensuring that 
<body> exists in the returned document.
2. Pushes all <script> elements in parsed HTML <head> element to the start of 
<body>, in order. This ensures that gadget scripts referencing document.body 
work appropriately. This may have odd side-effects if a particular gadget 
server installation oscillates between String- and DOM-based rewriters, since 
each String-to-DOM conversion (in MutableContent) will see <head><script> moved 
to <body>. Still, this change should be functionally intact.

I'm working on a GadgetHtmlParser subclass using Caja's 
non-validating/rejiggering parser now, which better fits with Shindig's use 
case. This code will hopefully tide us over until that time.


Added:
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Wed Nov 18 00:14:45 2009
@@ -19,6 +19,7 @@
 package org.apache.shindig.gadgets.parse.nekohtml;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.shindig.common.xml.DomUtil;
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.xerces.xni.Augmentations;
@@ -48,6 +49,7 @@
 import org.w3c.dom.DocumentFragment;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -107,6 +109,7 @@
     Document document = handler.getDocument();
     DocumentFragment fragment = handler.getFragment();
     normalizeFragment(document, fragment);
+    fixNekoWeirdness(document);
     return document;
   }
 
@@ -164,6 +167,51 @@
     htmlScanner.scanDocument(true);
     return handler;
   }
+  
+  private void fixNekoWeirdness(Document document) {
+    // Neko as of versions > 1.9.13 stuffs all leading <script> nodes into 
<head>.
+    // This breaks all sorts of assumptions in gadgets, notably the existence 
of document.body.
+    // We can't tell Neko to avoid putting <script> into <head> however, since 
gadgets
+    // like <Content><script>...</script><style>...</style> will break due to 
both
+    // <script> and <style> ending up in <body> -- at which point Neko 
unceremoniously
+    // drops the <style> (and <link>) elements.
+    // Therefore we just search for <script> elements in <head> and stuff them 
all into
+    // the top of <body>.
+    // This method assumes a normalized document as input.
+    Node html = DomUtil.getFirstNamedChildNode(document, "html");
+    if (html.getNextSibling() != null &&
+        html.getNextSibling().getNodeName().equalsIgnoreCase("html")) {
+      // if a doctype is specified, then the desired root <html> node is 
wrapped by an <HTML> node
+      // Pull out the <html> root.
+      html = html.getNextSibling();
+    }
+    Node head = DomUtil.getFirstNamedChildNode(html, "head");
+    if (head == null) {
+      head = document.createElement("head");
+      html.insertBefore(head, html.getFirstChild());
+    }
+    NodeList headNodes = head.getChildNodes();
+    Stack<Node> headScripts = new Stack<Node>();
+    for (int i = 0; i < headNodes.getLength(); ++i) {
+      Node headChild = headNodes.item(i);
+      if (headChild.getNodeName().equalsIgnoreCase("script")) {
+        headScripts.add(headChild);
+      }
+    }
+    
+    // Remove from head, add to top of <body> in <head> order.
+    Node body = DomUtil.getFirstNamedChildNode(html, "body");
+    if (body == null) {
+      body = document.createElement("body");
+      html.insertBefore(body, head.getNextSibling());
+    }
+    Node bodyFirst = body.getFirstChild();
+    while (headScripts.size() > 0) {
+      Node headScript = headScripts.pop();
+      head.removeChild(headScript);
+      body.insertBefore(headScript, bodyFirst);
+    }
+  }
 
   protected HTMLConfiguration newConfiguration() {
     HTMLConfiguration config = new HTMLConfiguration();

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
 Wed Nov 18 00:14:45 2009
@@ -68,5 +68,11 @@
     String expected = 
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html");
     parseAndCompareBalanced(content, expected, simple);
   }
+  
+  public void testScriptPushedToBody() throws Exception {
+    String content = 
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html");
+    String expected = 
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html");
+    parseAndCompareBalanced(content, expected, simple);
+  }
 
 }

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
 Wed Nov 18 00:14:45 2009
@@ -1,2 +1,2 @@
-<html><head><script>document.write("dont add to head or else")</script>
-<style type="text/css"> A { font : bold; }</style></head><body></body></html>
\ No newline at end of file
+<html><head>
+<style type="text/css"> A { font : bold; 
}</style></head><body><script>document.write("dont add to head or 
else")</script></body></html>
\ No newline at end of file

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
 Wed Nov 18 00:14:45 2009
@@ -1,3 +1,3 @@
 <html><head>
     <!-- A head tag but no body tag is not good -->
-<script>document.write("dont add to head or else")</script><style 
type="text/css"> A { font : bold; } </style></head><body></body></html>
\ No newline at end of file
+<style type="text/css"> A { font : bold; } 
</style></head><body><script>document.write("dont add to head or 
else")</script></body></html>
\ No newline at end of file

Added: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html?rev=881629&view=auto
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
 (added)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
 Wed Nov 18 00:14:45 2009
@@ -0,0 +1,6 @@
+<html><head>
+<style>Some CSS here</style>
+
+<link rel="linkrel">
+
+</head><body><script>foo3();</script><script>foo2();</script><script>foo1();</script><div
 id="mydiv">mycontent</div></body></html>
\ No newline at end of file

Added: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html?rev=881629&view=auto
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
 (added)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
 Wed Nov 18 00:14:45 2009
@@ -0,0 +1,6 @@
+<script>foo1();</script>
+<style>Some CSS here</style>
+<script>foo2();</script>
+<link rel="linkrel"/>
+<script>foo3();</script>
+<div id="mydiv">mycontent</div>
\ No newline at end of file


Reply via email to