Author: lindner
Date: Sun May 10 06:11:59 2009
New Revision: 773308

URL: http://svn.apache.org/viewvc?rev=773308&view=rev
Log:
SHINDIG-987 | Applied patch from Vincent with suggested modifications from Adam 
 | NekoParser returns cryptic error messages when parsing bad html

Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=773308&r1=773307&r2=773308&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Sun May 10 06:11:59 2009
@@ -17,6 +17,7 @@
  */
 package org.apache.shindig.gadgets.parse.nekohtml;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.HtmlSerializer;
@@ -37,6 +38,7 @@
 import org.cyberneko.html.HTMLScanner;
 import org.cyberneko.html.HTMLTagBalancer;
 import org.cyberneko.html.filters.NamespaceBinder;
+import org.w3c.dom.DOMException;
 import org.w3c.dom.DOMImplementation;
 import org.w3c.dom.Document;
 import org.w3c.dom.DocumentFragment;
@@ -292,7 +294,34 @@
           element.setAttributeNS(xmlAttributes.getURI(i), 
xmlAttributes.getQName(i),
               xmlAttributes.getValue(i));
         } else {
-          element.setAttribute(xmlAttributes.getLocalName(i) , 
xmlAttributes.getValue(i));
+          try {
+            element.setAttribute(xmlAttributes.getLocalName(i), xmlAttributes
+                .getValue(i));
+          } catch (DOMException e) {
+            switch (e.code) {
+            case DOMException.INVALID_CHARACTER_ERR:
+              StringBuilder sb = new StringBuilder(e.getMessage());
+              sb.append("Around ...<");
+              if (qName.prefix != null) {
+                sb.append(qName.prefix);
+                sb.append(":");
+              }
+              sb.append(qName.localpart);
+              for (int j = 0; j < xmlAttributes.getLength(); j++) {
+                if (StringUtils.isNotBlank(xmlAttributes.getLocalName(j))
+                    && StringUtils.isNotBlank(xmlAttributes.getValue(j))) {
+                  sb.append(' ');
+                  sb.append(xmlAttributes.getLocalName(j));
+                  sb.append("=\"");
+                  sb.append(xmlAttributes.getValue(j)).append('\"');
+                }
+              }
+              sb.append("...");
+              throw new DOMException(DOMException.INVALID_CHARACTER_ERR, 
sb.toString());
+            default:
+              throw e;
+            }
+          }
         }
       }
       appendChild(element);

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java?rev=773308&r1=773307&r2=773308&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
 Sun May 10 06:11:59 2009
@@ -28,6 +28,7 @@
 import org.apache.shindig.gadgets.spec.PipelinedData;
 import org.junit.Before;
 import org.junit.Test;
+import org.w3c.dom.DOMException;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -115,6 +116,19 @@
     assertEquals("Some content", spanElements.item(0).getTextContent());
   }
 
+  @Test
+  public void testInvalid() throws Exception {
+    String content = "<html><div id=\"div_super\" class=\"div_super\" 
valign:\"middle\"></div></html>";
+    try {
+      parser.parseDom(content);
+      assertTrue("No exception caught", false);
+    } catch (DOMException e) {
+      assertTrue(e.getMessage().contains("INVALID_CHARACTER_ERR"));
+      assertTrue(e.getMessage().contains(
+          "Around ...<div id=\"div_super\" class=\"div_super\"..."));
+    }
+  }
+
   private List<Element> getScripts(final String type) {
     NodeIterator nodeIterator = ((DocumentTraversal) document)
     .createNodeIterator(document, NodeFilter.SHOW_ELEMENT,


Reply via email to