Author: craigmcc
Date: Fri Apr 29 19:05:53 2005
New Revision: 165372
URL: http://svn.apache.org/viewcvs?rev=165372&view=rev
Log:
Check in "beefed up" parser and unit test case. Modified name of unit test
class (convention is "FooTestCase" to test a class "Foo", in the same package)
and tweaked build.xml so that "ant test" works.
Added:
struts/shale/trunk/clay-plugin/src/test/
struts/shale/trunk/clay-plugin/src/test/org/
struts/shale/trunk/clay-plugin/src/test/org/apache/
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java
Modified:
struts/shale/trunk/clay-plugin/build.xml
struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java
Modified: struts/shale/trunk/clay-plugin/build.xml
URL:
http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/build.xml?rev=165372&r1=165371&r2=165372&view=diff
==============================================================================
--- struts/shale/trunk/clay-plugin/build.xml (original)
+++ struts/shale/trunk/clay-plugin/build.xml Fri Apr 29 19:05:53 2005
@@ -304,7 +304,7 @@
usefile="false"/>
<batchtest fork="yes">
<fileset dir="${build.home}/test-classes"
- includes="org/apache/shale/clay/test/*/*TestCase.class"/>
+ includes="org/apache/shale/clay/**/*TestCase.class"/>
</batchtest>
</junit>
Modified:
struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java
URL:
http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java?rev=165372&r1=165371&r2=165372&view=diff
==============================================================================
---
struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java
(original)
+++
struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java
Fri Apr 29 19:05:53 2005
@@ -110,7 +110,7 @@
}
/**
- * <p>This method is overriden to convert the key into a neutral
+ * <p>This method is overridden to convert the key into a neutral
* case so that the <code>Map</code> access method will be case
* insensitive.</p>
*/
@@ -125,18 +125,78 @@
}
+
+ /**
+ * <p>This array of HTML tags can have optional ending tags.</p>
+ */
+ private static final String[] OPTIONAL_ENDING_TAG = {"TR", "TH", "TD",
"LI", "DT", "DD", "LH", "OPTION"};
/**
- * <p>Parse a document fragment into graphs of [EMAIL PROTECTED] Node}.
The
- * resulting type is a list because the fragment might not be
- * well-formed.
+ * <p>This array of parent tags is cross referenced by the
<code>OPTIONAL_ENDING_TAG</code> array</p>
+ */
+ private static final String[][] TAG_PARENTS = {{"TABLE"}, {"TABLE"},
{"TR"}, {"OL", "UL"}, {"DL"}, {"DL"}, {"DL"}, {"SELECT"}};
+
+ /**
+ * <p>
+ * Determines if a HTML nodeName is a type of tag that can optionally have
a
+ * ending tag
* </p>
- *
- * @param document input source
- * @return collection of [EMAIL PROTECTED] Node}
+ *
+ * @param nodeName the name of the html node
+ * @return <code>true</code> if the nodeName is in the
+ * <code>OPTIONAL-ENDING_TAG<code> array; otherwise,
<code>false</code> is returned
+ */
+ protected boolean isOptionalEndingTag(String nodeName) {
+ if (nodeName != null) {
+ for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) {
+ if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * <p>
+ * Checks to see if a optional ending tag has a valid parent. This is use
to
+ * detect a implicit ending tag
+ * </p>
+ *
+ * @param nodeName of the optional ending tag
+ * @param parentNodeName name of the parent
+ * @return <code>true</code> if the parentNodeName is a valid parent for
+ * the nodeName; otherwise, a <code>false</code> value is returned
*/
+ protected boolean isValidOptionalEndingTagParent(String nodeName,
+ String parentNodeName) {
+ if (nodeName != null && parentNodeName != null) {
+ for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) {
+ if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) {
+ for (int j = 0; j < TAG_PARENTS[i].length; j++) {
+ if
(TAG_PARENTS[i][j].equalsIgnoreCase(parentNodeName)) {
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * <p>
+ * Parse a document fragment into graphs of [EMAIL PROTECTED] Node}.
The resulting
+ * type is a list because the fragment might not be well-formed.
+ * </p>
+ *
+ * @param document
+ * input source
+ * @return collection of [EMAIL PROTECTED] Node}
+ */
public List parse(StringBuffer document) {
- Stack nodes = new Stack();
Node root = new Node(null);
Node current = root;
current.setName("namingContainer");
@@ -149,10 +209,10 @@
Node node = buildNode(token);
if (!node.isStart() && node.isEnd()) {
-
- while (true) {
+
+ pop: while (true) {
if (current == null)
- break;
+ break pop;
if (isNodeNameEqual(current, node)) {
@@ -163,9 +223,12 @@
// nodes are equal, make the parent of the
// begin tag the current node
current = current.getParent();
- break;
+ break pop;
}
-
+
+ if (isOptionalEndingTag(current.getName()))
+ current.setWellFormed(true);
+
current = (Node) current.getParent();
}
@@ -181,22 +244,52 @@
// </select>
//
- if ((current.getName() != null && current.getName()
- .equalsIgnoreCase("option"))
- && (current.isStart() && !current.isEnd())
- && (current.getParent() != null
- && current.getParent().getName() != null && current
- .getParent().getName().equalsIgnoreCase(
- "select"))) {
+
+ if (isOptionalEndingTag(current.getName())
+ && current.isStart() && !current.isEnd()
+ && current.getParent() != null
+ && isValidOptionalEndingTagParent(node.getName(),
current.getParent().getName())) {
+ current.setWellFormed(true);
current.getParent().addChild(node);
current = node;
} else {
-
- // adding a new node to the current making it current
- current.addChild(node);
- current = node;
+ // the current node is a optional and the new node is it's
parent
+ // simulate having ending nodes
+ if (this.isValidOptionalEndingTagParent(current.getName(),
node.getName())) {
+
+ pop: while (true) {
+ if (current == null)
+ break pop;
+
+ if (isNodeNameEqual(current, node)) {
+
+ // isWellFormed indicates a beginning tag and
ending tag
+ // was found
+ current.setWellFormed(true);
+
+ // nodes are equal, make the parent of the
+ // begin tag the current node
+ current = current.getParent();
+
current.addChild(node);
+ current = node;
+
+ break pop;
+ }
+
+ if (isOptionalEndingTag(current.getName()))
+ current.setWellFormed(true);
+
+ current = (Node) current.getParent();
+
+ }
+
+ } else {
+ // adding a new node to the current making it current
+ current.addChild(node);
+ current = node;
+ }
}
} else {
if (current != null)
@@ -227,12 +320,12 @@
if (node1 != null && node2 != null) {
if (node1.getName() != null && node2.getName() != null) {
- if (node1.getName().equals(node2.getName())) {
+ if (node1.getName().equalsIgnoreCase(node2.getName())) {
if (node1.getQname() == null && node2.getQname() == null)
f = true;
else if (node1.getQname() != null
&& node2.getQname() != null
- && node1.getQname().equals(node2.getQname()))
+ &&
node1.getQname().equalsIgnoreCase(node2.getQname()))
f = true;
}
}
@@ -247,6 +340,32 @@
return f;
}
+ private final static String[] SELF_TERMINATING = {"META", "LINK", "HR",
+ "BASEFONT","IMG", "PARAM", "BR", "AREA", "INPUT", "ISINDEX",
+ "BASE"};
+
+ /**
+ * <p>
+ * Checks to see if the nodeName is within the
<code>SELF_TERMINATING</code>
+ * table of values
+ * </p>
+ *
+ * @param nodeName to check for self termination
+ * @return <code>true</code> if is self terminating otherwise
+ * <code>false</code>
+ */
+ protected boolean isSelfTerminating(String nodeName) {
+
+ if (nodeName != null) {
+ for (int i = 0; i < SELF_TERMINATING.length; i++) {
+ if (SELF_TERMINATING[i].equalsIgnoreCase(nodeName))
+ return true;
+ }
+ }
+
+ return false;
+ }
+
/**
* <p>This is a factory method that builds a [EMAIL PROTECTED] Node} from a
* [EMAIL PROTECTED] Token}.
@@ -255,6 +374,7 @@
protected Node buildNode(Token token) {
boolean isBeginTag = false;
boolean isEndTag = false;
+ boolean isComment = false;
String nodeName = null;
String qname = null;
@@ -272,43 +392,72 @@
isEndTag = true;
isBeginTag = true;
} else if (token.getDocument().charAt(token.getBeginOffset()) == '<'
+ && token.getDocument().charAt(token.getBeginOffset() + 1) == '!'
+ && token.getDocument().charAt(token.getEndOffset() - 2) == '-'
+ && token.getDocument().charAt(token.getEndOffset() - 1) == '>') {
+ // self contained comment tag found
+ isEndTag = true;
+ isBeginTag = true;
+ isComment = true;
+ } else if (token.getDocument().charAt(token.getBeginOffset()) == '<'
+ && token.getDocument().charAt(token.getBeginOffset() + 1) == '!'
+ && token.getDocument().charAt(token.getEndOffset() - 1) != '>') {
+ // begin comment tag found
+ isEndTag = false;
+ isBeginTag = true;
+ isComment = true;
+ } else if (token.getDocument().charAt(token.getBeginOffset()) != '<'
+ && token.getDocument().charAt(token.getEndOffset() - 2) == '-'
+ && token.getDocument().charAt(token.getEndOffset() - 1) == '>') {
+ // ending comment tag found
+ isEndTag = true;
+ isBeginTag = false;
+ isComment = true;
+ } else if (token.getDocument().charAt(token.getBeginOffset()) == '<'
&& (token.getDocument().charAt(token.getBeginOffset() + 1) !=
'/'
- && token.getDocument().charAt(
- token.getBeginOffset() + 1) != '?' && token
- .getDocument().charAt(token.getBeginOffset() + 1) != '%')
+ && token.getDocument().charAt(token.getBeginOffset() + 1) !=
'?'
+ && token.getDocument().charAt(token.getBeginOffset() + 1) !=
'%')
&& token.getDocument().charAt(token.getEndOffset() - 1) ==
'>') {
- // ending tag found
+ // beginning tag found
isEndTag = false;
isBeginTag = true;
}
// find the node name
if (isBeginTag || isEndTag) {
- // find the node name delimiter
- int e = token.getDocument()
- .indexOf(" ", token.getBeginOffset() + 2);
- // end of token is the delimiter
- if (e == -1 || e > token.getEndOffset())
- e = (isBeginTag && isEndTag) ? (token.getEndOffset() - 2)
- : (token.getEndOffset() - 1);
- // find the start of the node attribute body
- int s = (!isBeginTag && isEndTag) ? token.getBeginOffset() + 2
- : token.getBeginOffset() + 1;
-
- // return the full node name
- nodeName = token.getDocument().substring(s, e);
- // separate the namespace
- e = nodeName.indexOf(':');
- if (e > -1)
- qname = nodeName.substring(0, e);
- nodeName = nodeName.substring(e + 1);
+ // comments are treated special because and ending comment may
will not
+ // have a node name <!-- <input > -->
+ if (isComment) {
+
+ nodeName = "--";
+
+ } else {
+ // find the node name delimiter
+ int e = token.getDocument().indexOf(" ",
token.getBeginOffset() + 2);
+ // end of token is the delimiter
+ if (e == -1 || e > token.getEndOffset())
+ e = (isBeginTag && isEndTag) ? (token.getEndOffset() - 2)
+ : (token.getEndOffset() - 1);
+ // find the start of the node attribute body
+ int s = (!isBeginTag && isEndTag) ? token.getBeginOffset()
+ 2
+ : token.getBeginOffset() + 1;
+
+ // return the full node name
+ nodeName = token.getDocument().substring(s, e);
+ // separate the namespace
+ e = nodeName.indexOf(':');
+ if (e > -1)
+ qname = nodeName.substring(0, e);
+ nodeName = nodeName.substring(e + 1);
+
+ }
}
Attributes attributes = this.new Attributes();
// look for attribute in a beginning tag only
- if (isBeginTag) {
+ if (isBeginTag && !isComment) {
int s = token.getDocument()
.indexOf(" ", token.getBeginOffset() + 2);
@@ -330,9 +479,7 @@
// add some exception here. check for html elements that are assumed
// self terminating
- if (nodeName != null
- && (nodeName.equalsIgnoreCase("input") ||
- nodeName.equalsIgnoreCase("br"))) {
+ if (isBeginTag && isSelfTerminating(nodeName)) {
isEndTag = true;
}
Added:
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java
URL:
http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java?rev=165372&view=auto
==============================================================================
---
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java
(added)
+++
struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java
Fri Apr 29 19:05:53 2005
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.shale.clay.parser;
+
+import java.util.Iterator;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class ParserTestCase extends TestCase {
+
+
+ /**
+ * <p>Tests to see if we can parse a document fragment
+ * that has multiple root nodes</p>
+ */
+ public void testManyRootNodes() {
+ Parser p = new Parser();
+ StringBuffer doc1 = new StringBuffer();
+
+ doc1.append("<p>")
+ .append("<input type=text size=10 maxsize=10 id=username>")
+ .append("<input type=text size=10 maxsize=10 id=password>")
+ .append("</p>")
+ .append("<p>")
+ .append("This is a test. Just a test")
+ .append("</p>")
+ .append("<p></p>");
+
+ List nodes1 = p.parse(doc1);
+ assertTrue("Has 3 root nodes", nodes1.size() == 3);
+
+ }
+
+ /**
+ * <p>Test a couple comment block scenarios</p>
+ */
+ public void testCommentBlocks() {
+ Parser p = new Parser();
+ StringBuffer doc1 = new StringBuffer();
+
+
+ doc1.append("<p>")
+ .append("<!--<input type=text size=10 maxsize=10 id=username>")
+ .append("<input type=text size=10 maxsize=10 id=password>-->")
+ .append("</p>")
+ .append("<!--This is a test. Just a test-->")
+ .append("<!--<p>Testing <b>123</b></p>-->");
+
+ List nodes1 = p.parse(doc1);
+ assertTrue("Has 3 root nodes", nodes1.size() == 3);
+
+ Node node = (Node) nodes1.get(0);
+ assertTrue("first paragraph has 1 node", node.getChildren().size() ==
1);
+
+ node = (Node) node.getChildren().get(0);
+ assertTrue("first comment block has 2 nodes",
node.getChildren().size() == 2);
+
+ node = (Node) nodes1.get(1);
+ assertTrue("second root has 0 child nodes", node.getChildren().size()
== 0);
+
+ node = (Node) nodes1.get(2);
+ assertTrue("third root has 1 child node", node.getChildren().size() ==
1);
+
+
+ }
+
+
+ /**
+ * <p>
+ * Tests case insensitivity in parsing the document.
+ * </p>
+ */
+ public void testCaseInsensitivity() {
+ Parser p = new Parser();
+ StringBuffer doc1 = new StringBuffer();
+
+ doc1.append("<body>").append("<p>").append(
+ "<input type=text size=10 maxsize=10 id=username>").append(
+ "<input type=text size=10 maxsize=10 id=password>").append(
+ "<textarea id=memo>testing 123</textarea>").append("</p>")
+ .append("</body>");
+
+ List nodes1 = p.parse(doc1);
+ assertTrue("Lower case HTML document", nodes1.size() == 1);
+
+ StringBuffer doc2 = new StringBuffer();
+
+ doc2.append("<BODY>").append("<p>").append(
+ "<input TYPE=text size=10 maxSize=10 Id=username>").append(
+ "<input typE=text size=10 MAXSIZE=10 id=password>").append(
+ "<textarea id=memo>testing 123</TEXTAREA>").append("</P>")
+ .append("</body>");
+
+ List nodes2 = p.parse(doc2);
+ assertTrue("Mixed case HTML document", nodes2.size() == 1);
+
+ compareTrees(nodes1, nodes2);
+
+ }
+
+ /**
+ * <p>
+ * Tests the parsing to make sure that self terminated nodes are handled
the
+ * same as well-formed self terminating nodes.
+ * </p>
+ */
+ public void testSelfTerminating() {
+ Parser p = new Parser();
+ StringBuffer doc1 = new StringBuffer();
+
+ doc1
+ .append("<head>")
+ .append("<title>Shale Rocks</title>")
+ .append(
+ "<style type=\"text/css\" media=\"all\"><!-- @import
\"common.css\"; --></style>")
+ .append(
+ "<style type=\"text/css\" media=\"all\"><!-- @import
\"content.css\"; --></style>")
+ .append(
+ "<script type=\"text/javascript\"
src=\"common.js\"></script>")
+ .append(
+ "<meta http-equiv=\"content-type\"
content=\"text/html; charset=iso-8859-1\">")
+ .append("<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\">")
+ .append("<META HTTP-EQUIV=\"Expires\" CONTENT=\"-1\">")
+ .append(
+ "<meta http-equiv=\"Content-Type\"
content=\"text/html; charset=iso-8859-1\">")
+ .append("</head>");
+
+ List nodes1 = p.parse(doc1);
+ assertTrue("Lazy HTML has 1 node", nodes1.size() == 1);
+
+ StringBuffer doc2 = new StringBuffer();
+
+ doc2
+ .append("<head>")
+ .append("<title>Shale Rocks</title>")
+ .append(
+ "<style type=\"text/css\" media=\"all\"><!-- @import
\"common.css\"; --></style>")
+ .append(
+ "<style type=\"text/css\" media=\"all\"><!-- @import
\"content.css\"; --></style>")
+ .append(
+ "<script type=\"text/javascript\"
src=\"common.js\"></script>")
+ .append(
+ "<meta http-equiv=\"content-type\"
content=\"text/html; charset=iso-8859-1\"/>")
+ .append("<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"/>")
+ .append("<META HTTP-EQUIV=\"Expires\" CONTENT=\"-1\"/>")
+ .append(
+ "<meta http-equiv=\"Content-Type\"
content=\"text/html; charset=iso-8859-1\"/>")
+ .append("</head>");
+
+ List nodes2 = p.parse(doc2);
+ assertTrue("Well-formed HTML has 1 node", nodes2.size() == 1);
+
+ compareTrees(nodes1, nodes2);
+
+ }
+
+ /**
+ * <p>
+ * Tests to make sure that the parser handles the HTML tags that can have
+ * optional ending tags the same that it would a document that was
+ * well-formed
+ * </p>
+ */
+ public void testOptionalEnding() {
+ Parser p = new Parser();
+
+ StringBuffer doc1 = new StringBuffer();
+
+ // lazy html
+ doc1
+ .append("<table>")
+ .append("<tr><td>")
+ .append("<ol><li>1<li>2<li>3</ol>")
+ .append("<tr><td>")
+ .append(
+ "<select><option value=1>1<option value=1>2<option
value=1>3</select>")
+ .append("</table>");
+
+ List nodes1 = p.parse(doc1);
+ assertTrue("Lazy HTML has 1 node", nodes1.size() == 1);
+
+ // good html
+ StringBuffer doc2 = new StringBuffer();
+ doc2
+ .append("<table>")
+ .append("<tr><td>")
+ .append("<ol><li>1</li><li>2</li><li>3</li></ol>")
+ .append("</td></tr>")
+ .append("<tr><td>")
+ .append(
+ "<select><option value=1>1</option><option
value=1>2</option><option value=1>3</option></select>")
+ .append("</td></tr>").append("</table>");
+
+ List nodes2 = p.parse(doc2);
+ assertTrue("Well-formed HTML has 1 node", nodes2.size() == 1);
+
+ compareTrees(nodes1, nodes2);
+
+ }
+
+ /**
+ * <p>
+ * Aserts that two trees of parsed HTML have the same number children and
+ * the same attributes. Verifies that the structure is the same
+ * </p>
+ *
+ * @param tree1
+ * @param tree2
+ */
+ protected void compareTrees(List tree1, List tree2) {
+
+ boolean isSame = (tree1 == null && tree2 == null)
+ || (tree1.size() == tree2.size());
+
+ assertTrue("Trees have same # children", isSame);
+ if (tree1 != null && tree2 != null) {
+ for (int i = 0; i < tree1.size(); i++) {
+ Node node1 = (Node) tree1.get(i);
+ Node node2 = (Node) tree2.get(i);
+
+ isSame = false;
+ if (node1 != null && node2 != null) {
+ if (node1.getName() == null && node2.getName() == null)
+ isSame = true;
+ else
+ isSame = (node1.getName() != null
+ && node2.getName() != null && node1.getName()
+ .equalsIgnoreCase(node2.getName()));
+
+ assertTrue("Nodes names are equal", isSame);
+
+ isSame = (node1.getAttributes().size() == node2
+ .getAttributes().size());
+ assertTrue("Nodes have same # attributes", isSame);
+ Iterator ki = node1.getAttributes().keySet().iterator();
+ while (ki.hasNext()) {
+ String key = (String) ki.next();
+ String value1 = (String)
node1.getAttributes().get(key);
+ String value2 = (String)
node2.getAttributes().get(key);
+ isSame = value1.equalsIgnoreCase(value2);
+ assertTrue("Nodes have same attribute value", isSame);
+
+ }
+ compareTrees(node1.getChildren(), node2.getChildren());
+
+ }
+ }
+
+ }
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]