Author: vsiveton
Date: Thu Mar  5 12:07:28 2009
New Revision: 750433

URL: http://svn.apache.org/viewvc?rev=750433&view=rev
Log:
SHINDIG-915: Ampersands in attributes not handled properly by Neko HTML parser 
code

o updated printAttributeValue() to take care of ampersand
o added test case

Added:
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
   (with props)
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
   (with props)
Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 Thu Mar  5 12:07:28 2009
@@ -31,6 +31,7 @@
 
 import java.io.IOException;
 import java.io.StringWriter;
+import java.net.URL;
 
 /**
  * This parser does not try to escape entities in text content as it expects 
the parser
@@ -58,7 +59,7 @@
   public static void serialize(Node n, Appendable output) throws IOException {
     serialize(n, output, false);
   }
-    
+
   private static void serialize(Node n, Appendable output, boolean xmlMode)
       throws IOException {
     switch (n.getNodeType()) {
@@ -77,10 +78,10 @@
         Element elem = (Element)n;
         HTMLElements.Element htmlElement =
             HTMLElements.getElement(elem.getNodeName());
-        
+
         NodeList children = elem.getChildNodes();
         printStartElement(elem, output, xmlMode && htmlElement.isEmpty());
-        
+
         // Special HTML elements - <script> in particular - will typically
         // only have CDATA.  If they do have elements, that'd be data 
pipelining
         // or templating kicking in, and we should use XML-format output.
@@ -120,15 +121,15 @@
   }
 
   /**
-   * Print the start of an HTML element. 
+   * Print the start of an HTML element.
    */
   public static void printStartElement(Element elem, Appendable output) throws 
IOException {
     printStartElement(elem, output, false);
   }
-  
+
   /**
    * Print the start of an HTML element.  If withXmlClose==true, this is an
-   * empty element that should have its content 
+   * empty element that should have its content
    */
   private static void printStartElement(Element elem, Appendable output, 
boolean withXmlClose)
       throws IOException {
@@ -148,11 +149,21 @@
   }
 
   public static void printAttributeValue(String text, Appendable output) 
throws IOException {
+    boolean isUrl = false;
+    try {
+        new URL(text);
+        isUrl = true;
+    } catch (Exception e) {
+        // nop
+    }
+
     int length = text.length();
     for (int j = 0; j < length; j++) {
       char c = text.charAt(j);
       if (c == '"') {
         output.append("&quot;");
+      } else if (c == '&' && !isUrl) {
+        output.append("&amp;");
       } else {
         output.append(c);
       }

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Thu Mar  5 12:07:28 2009
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.net.URL;
 import java.util.Set;
 import java.util.Stack;
 
@@ -70,11 +71,11 @@
   public NekoSimplifiedHtmlParser(DOMImplementation documentFactory) {
     this.documentFactory = documentFactory;
   }
-  
+
   @Override
   protected Document parseDomImpl(String source) {
     DocumentHandler handler;
-    
+
     try {
       handler = parseHtmlImpl(source);
     } catch (IOException ioe) {
@@ -91,7 +92,7 @@
   @Override
   protected DocumentFragment parseFragmentImpl(String source) throws 
GadgetException {
     DocumentHandler handler;
-    
+
     try {
       handler = parseHtmlImpl(source);
     } catch (IOException ioe) {
@@ -118,7 +119,7 @@
         return HTMLElements.getElement(name);
       }
     };
-    
+
     DocumentHandler handler = newDocumentHandler(source, htmlScanner);
 
     if (config.getFeature("http://xml.org/sax/features/namespaces";)) {
@@ -130,13 +131,13 @@
     } else {
       tagBalancer.setDocumentHandler(handler);
     }
-    
-    tagBalancer.setDocumentSource(htmlScanner);    
+
+    tagBalancer.setDocumentSource(htmlScanner);
     htmlScanner.setDocumentHandler(tagBalancer);
 
     tagBalancer.reset(config);
     htmlScanner.reset(config);
-    
+
     XMLInputSource inputSource = new XMLInputSource(null, null, null);
     inputSource.setEncoding("UTF-8");
     inputSource.setCharacterStream(new StringReader(source));
@@ -144,7 +145,7 @@
     htmlScanner.scanDocument(true);
     return handler;
   }
-  
+
   protected HTMLConfiguration newConfiguration() {
     HTMLConfiguration config = new HTMLConfiguration();
     // Maintain original case for elements and attributes
@@ -168,7 +169,7 @@
   protected boolean isElementImportant(QName qName) {
     return elements.contains(qName.rawname.toLowerCase());
   }
-  
+
   /**
    * Handler for XNI events from Neko
    */
@@ -273,7 +274,7 @@
         
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
         builder.setLength(0);
       }
-      
+
       Element element;
       // Preserve XML namespace if present
       if (qName.uri != null) {
@@ -281,7 +282,7 @@
       } else {
         element = document.createElement(qName.rawname);
       }
-      
+
       for (int i = 0; i < xmlAttributes.getLength(); i++) {
         if (xmlAttributes.getURI(i) != null) {
           element.setAttributeNS(xmlAttributes.getURI(i), 
xmlAttributes.getQName(i),
@@ -295,10 +296,20 @@
     }
 
     private void appendAttributeValue(String text) {
+      boolean isUrl = false;
+      try {
+        new URL(text);
+        isUrl = true;
+      } catch (Exception e) {
+        // nop
+      }
+
       for (int i = 0; i < text.length(); i++) {
         char c = text.charAt(i);
         if (c == '"') {
           builder.append("&quot;");
+        } else if (c == '&' && !isUrl) {
+          builder.append("&amp;");
         } else {
           builder.append(c);
         }

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
 Thu Mar  5 12:07:28 2009
@@ -79,6 +79,17 @@
     parseAndCompareBalanced(content, expected, simple);
   }
 
+  public void testAmpersand() throws Exception {
+    // Note that no doctype is injected for fragments
+    String content = IOUtils.toString(this.getClass().getClassLoader().
+        
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html"));
+    String expected = IOUtils.toString(this.getClass().getClassLoader().
+        getResourceAsStream(
+        
"org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html"));
+    parseAndCompareBalanced(content, expected, full);
+    parseAndCompareBalanced(content, expected, simple);
+  }
+
   private void parseAndCompareBalanced(String content, String expected, 
GadgetHtmlParser parser)
       throws Exception {
     Document document = parser.parseDom(content);

Added: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html?rev=750433&view=auto
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
 (added)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
 Thu Mar  5 12:07:28 2009
@@ -0,0 +1,11 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
+<html>
+<head id="head">
+  <link href="http://www.example.org/css.css"; rel="stylesheet" type="text/css">
+  <title>An example</title>
+</head>
+<body>
+  <!-- Some comment -->
+  <span title="&amp;lt;">content</span>
+
+</body></html>
\ No newline at end of file

Propchange: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html?rev=750433&view=auto
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
 (added)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
 Thu Mar  5 12:07:28 2009
@@ -0,0 +1,11 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
+<html>
+<head id="head">
+  <link href="http://www.example.org/css.css"; rel="stylesheet" type="text/css">
+  <title>An example</title>
+</head>
+<body>
+  <!-- Some comment -->
+  <span title="&amp;lt;">content</span>
+</body>
+</html>
\ No newline at end of file

Propchange: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to