Author: vsiveton
Date: Thu Mar 5 12:07:28 2009
New Revision: 750433
URL: http://svn.apache.org/viewvc?rev=750433&view=rev
Log:
SHINDIG-915: Ampersands in attributes not handled properly by Neko HTML parser
code
o updated printAttributeValue() to take care of ampersand
o added test case
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
(with props)
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
(with props)
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
Thu Mar 5 12:07:28 2009
@@ -31,6 +31,7 @@
import java.io.IOException;
import java.io.StringWriter;
+import java.net.URL;
/**
* This parser does not try to escape entities in text content as it expects
the parser
@@ -58,7 +59,7 @@
public static void serialize(Node n, Appendable output) throws IOException {
serialize(n, output, false);
}
-
+
private static void serialize(Node n, Appendable output, boolean xmlMode)
throws IOException {
switch (n.getNodeType()) {
@@ -77,10 +78,10 @@
Element elem = (Element)n;
HTMLElements.Element htmlElement =
HTMLElements.getElement(elem.getNodeName());
-
+
NodeList children = elem.getChildNodes();
printStartElement(elem, output, xmlMode && htmlElement.isEmpty());
-
+
// Special HTML elements - <script> in particular - will typically
// only have CDATA. If they do have elements, that'd be data
pipelining
// or templating kicking in, and we should use XML-format output.
@@ -120,15 +121,15 @@
}
/**
- * Print the start of an HTML element.
+ * Print the start of an HTML element.
*/
public static void printStartElement(Element elem, Appendable output) throws
IOException {
printStartElement(elem, output, false);
}
-
+
/**
* Print the start of an HTML element. If withXmlClose==true, this is an
- * empty element that should have its content
+ * empty element that should have its content
*/
private static void printStartElement(Element elem, Appendable output,
boolean withXmlClose)
throws IOException {
@@ -148,11 +149,21 @@
}
public static void printAttributeValue(String text, Appendable output)
throws IOException {
+ boolean isUrl = false;
+ try {
+ new URL(text);
+ isUrl = true;
+ } catch (Exception e) {
+ // nop
+ }
+
int length = text.length();
for (int j = 0; j < length; j++) {
char c = text.charAt(j);
if (c == '"') {
output.append(""");
+ } else if (c == '&' && !isUrl) {
+ output.append("&");
} else {
output.append(c);
}
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Thu Mar 5 12:07:28 2009
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.StringReader;
+import java.net.URL;
import java.util.Set;
import java.util.Stack;
@@ -70,11 +71,11 @@
public NekoSimplifiedHtmlParser(DOMImplementation documentFactory) {
this.documentFactory = documentFactory;
}
-
+
@Override
protected Document parseDomImpl(String source) {
DocumentHandler handler;
-
+
try {
handler = parseHtmlImpl(source);
} catch (IOException ioe) {
@@ -91,7 +92,7 @@
@Override
protected DocumentFragment parseFragmentImpl(String source) throws
GadgetException {
DocumentHandler handler;
-
+
try {
handler = parseHtmlImpl(source);
} catch (IOException ioe) {
@@ -118,7 +119,7 @@
return HTMLElements.getElement(name);
}
};
-
+
DocumentHandler handler = newDocumentHandler(source, htmlScanner);
if (config.getFeature("http://xml.org/sax/features/namespaces")) {
@@ -130,13 +131,13 @@
} else {
tagBalancer.setDocumentHandler(handler);
}
-
- tagBalancer.setDocumentSource(htmlScanner);
+
+ tagBalancer.setDocumentSource(htmlScanner);
htmlScanner.setDocumentHandler(tagBalancer);
tagBalancer.reset(config);
htmlScanner.reset(config);
-
+
XMLInputSource inputSource = new XMLInputSource(null, null, null);
inputSource.setEncoding("UTF-8");
inputSource.setCharacterStream(new StringReader(source));
@@ -144,7 +145,7 @@
htmlScanner.scanDocument(true);
return handler;
}
-
+
protected HTMLConfiguration newConfiguration() {
HTMLConfiguration config = new HTMLConfiguration();
// Maintain original case for elements and attributes
@@ -168,7 +169,7 @@
protected boolean isElementImportant(QName qName) {
return elements.contains(qName.rawname.toLowerCase());
}
-
+
/**
* Handler for XNI events from Neko
*/
@@ -273,7 +274,7 @@
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
builder.setLength(0);
}
-
+
Element element;
// Preserve XML namespace if present
if (qName.uri != null) {
@@ -281,7 +282,7 @@
} else {
element = document.createElement(qName.rawname);
}
-
+
for (int i = 0; i < xmlAttributes.getLength(); i++) {
if (xmlAttributes.getURI(i) != null) {
element.setAttributeNS(xmlAttributes.getURI(i),
xmlAttributes.getQName(i),
@@ -295,10 +296,20 @@
}
private void appendAttributeValue(String text) {
+ boolean isUrl = false;
+ try {
+ new URL(text);
+ isUrl = true;
+ } catch (Exception e) {
+ // nop
+ }
+
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c == '"') {
builder.append(""");
+ } else if (c == '&' && !isUrl) {
+ builder.append("&");
} else {
builder.append(c);
}
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java?rev=750433&r1=750432&r2=750433&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
Thu Mar 5 12:07:28 2009
@@ -79,6 +79,17 @@
parseAndCompareBalanced(content, expected, simple);
}
+ public void testAmpersand() throws Exception {
+ // Note that no doctype is injected for fragments
+ String content = IOUtils.toString(this.getClass().getClassLoader().
+
getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html"));
+ String expected = IOUtils.toString(this.getClass().getClassLoader().
+ getResourceAsStream(
+
"org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html"));
+ parseAndCompareBalanced(content, expected, full);
+ parseAndCompareBalanced(content, expected, simple);
+ }
+
private void parseAndCompareBalanced(String content, String expected,
GadgetHtmlParser parser)
throws Exception {
Document document = parser.parseDom(content);
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html?rev=750433&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
(added)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
Thu Mar 5 12:07:28 2009
@@ -0,0 +1,11 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head id="head">
+ <link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">
+ <title>An example</title>
+</head>
+<body>
+ <!-- Some comment -->
+ <span title="&lt;">content</span>
+
+</body></html>
\ No newline at end of file
Propchange:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html
------------------------------------------------------------------------------
svn:eol-style = native
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html?rev=750433&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
(added)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
Thu Mar 5 12:07:28 2009
@@ -0,0 +1,11 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head id="head">
+ <link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">
+ <title>An example</title>
+</head>
+<body>
+ <!-- Some comment -->
+ <span title="&lt;">content</span>
+</body>
+</html>
\ No newline at end of file
Propchange:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands.html
------------------------------------------------------------------------------
svn:eol-style = native