woolfel 2003/10/16 12:10:38
Added: src/htmlparser/org/htmlparser/parserHelper
AttributeParser.java CompositeTagScannerHelper.java
ParserHelper.java StringParser.java TagParser.java
Log:
more classes to add
Revision Changes Path
1.1
jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/AttributeParser.java
Index: AttributeParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.parserHelper;
import java.util.Hashtable;
import java.util.StringTokenizer;
import org.htmlparser.tags.Tag;
/**
* To change this generated comment edit the template variable "typecomment":
* Window>Preferences>Java>Templates.
* To enable and disable the creation of type comments go to
* Window>Preferences>Java>Code Generation.
* @author Somik Raha, Kaarle Kaila
* @version 7 AUG 2001
*/
public class AttributeParser
{
private final String delima = " \t\r\n\f=\"'>";
private final String delimb = " \t\r\n\f\"'>";
private final char doubleQuote = '\"';
private final char singleQuote = '\'';
private String delim;
/**
* Method to break the tag into pieces.
* @param returns a Hastable with elements containing the
* pieces of the tag. The tag-name has the value field set to
* the constant Tag.TAGNAME. In addition the tag-name is
* stored into the Hashtable with the name Tag.TAGNAME
* where the value is the name of the tag.
* Tag parameters without value
* has the value "". Parameters with value are represented
* in the Hastable by a name/value pair.
* As html is case insensitive but Hastable is not are all
* names converted into UPPERCASE to the Hastable
* E.g extract the href values from A-tag's and print them
* <pre>
*
* Tag tag;
* Hashtable h;
* String tmp;
* try {
* NodeReader in = new NodeReader(new FileReader(path),2048);
* Parser p = new Parser(in);
* Enumeration en = p.elements();
* while (en.hasMoreElements()) {
* try {
* tag = (Tag)en.nextElement();
* h = tag.parseParameters();
* tmp = (String)h.get(tag.TAGNAME);
* if (tmp != null && tmp.equalsIgnoreCase("A")) {;
* System.out.println("URL is :" + h.get("HREF"));
* }
* } catch (ClassCastException ce){}
* }
* }
* catch (IOException ie) {
* ie.printStackTrace();
* }
* </pre>
*
*/
public Hashtable parseAttributes(Tag tag)
{
Hashtable h = new Hashtable();
String element, name, value, nextPart = null;
String empty = null;
name = null;
value = null;
element = null;
boolean waitingForEqual = false;
delim = delima;
StringTokenizer tokenizer =
new StringTokenizer(tag.getText(), delim, true);
while (true)
{
nextPart = getNextPart(tokenizer, delim);
delim = delima;
if (element == null && nextPart != null && !nextPart.equals("="))
{
element = nextPart;
putDataIntoTable(h, element, null, true);
}
else
{
if (nextPart != null && (0 < nextPart.length()))
{
if (name == null)
{
if (!nextPart.substring(0, 1).equals(" "))
{
name = nextPart;
waitingForEqual = true;
}
}
else
{
if (waitingForEqual)
{
if (nextPart.equals("="))
{
waitingForEqual = false;
delim = delimb;
}
else
{
putDataIntoTable(h, name, "", false);
name = nextPart;
value = null;
}
}
if (!waitingForEqual && !nextPart.equals("="))
{
value = nextPart;
putDataIntoTable(h, name, value, false);
name = null;
value = null;
}
}
}
else
{
if (name != null)
{
if (name.equals("/"))
{
putDataIntoTable(h, Tag.EMPTYTAG, "", false);
}
else
{
putDataIntoTable(h, name, "", false);
}
name = null;
value = null;
}
break;
}
}
}
if (null == element) // handle no tag contents
putDataIntoTable(h, "", null, true);
return h;
}
private String getNextPart(StringTokenizer tokenizer, String deli)
{
String tokenAccumulator = null;
boolean isDoubleQuote = false;
boolean isSingleQuote = false;
boolean isDataReady = false;
String currentToken;
while (isDataReady == false && tokenizer.hasMoreTokens())
{
currentToken = tokenizer.nextToken(deli);
//
// First let's combine tokens that are inside "" or ''
//
if (isDoubleQuote || isSingleQuote)
{
if (isDoubleQuote && currentToken.charAt(0) == doubleQuote)
{
isDoubleQuote = false;
isDataReady = true;
}
else if (
isSingleQuote && currentToken.charAt(0) == singleQuote)
{
isSingleQuote = false;
isDataReady = true;
}
else
{
tokenAccumulator += currentToken;
continue;
}
}
else if (currentToken.charAt(0) == doubleQuote)
{
isDoubleQuote = true;
tokenAccumulator = "";
continue;
}
else if (currentToken.charAt(0) == singleQuote)
{
isSingleQuote = true;
tokenAccumulator = "";
continue;
}
else
tokenAccumulator = currentToken;
if (tokenAccumulator.equals(currentToken))
{
if (delim.indexOf(tokenAccumulator) >= 0)
{
if (tokenAccumulator.equals("="))
{
isDataReady = true;
}
}
else
{
isDataReady = true;
}
}
else
isDataReady = true;
}
return tokenAccumulator;
}
private void putDataIntoTable(
Hashtable h,
String name,
String value,
boolean isName)
{
if (isName && value == null)
value = Tag.TAGNAME;
else if (value == null)
value = ""; // Hashtable does not accept nulls
if (isName)
{
// store tagname as tag.TAGNAME,tag
h.put(value, name.toUpperCase());
}
else
{
// store tag parameters as NAME, value
h.put(name.toUpperCase(), value);
}
}
}
1.1
jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/CompositeTagScannerHelper.java
Index: CompositeTagScannerHelper.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.parserHelper;
import org.htmlparser.Node;
import org.htmlparser.NodeReader;
import org.htmlparser.scanners.CompositeTagScanner;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.tags.EndTag;
import org.htmlparser.tags.Tag;
import org.htmlparser.tags.data.CompositeTagData;
import org.htmlparser.tags.data.TagData;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class CompositeTagScannerHelper
{
private CompositeTagScanner scanner;
private Tag tag;
private String url;
private NodeReader reader;
private String currLine;
private Tag endTag;
private NodeList nodeList;
private boolean endTagFound;
private int startingLineNumber;
private int endingLineNumber;
private boolean balance_quotes;
public CompositeTagScannerHelper(
CompositeTagScanner scanner,
Tag tag,
String url,
NodeReader reader,
String currLine,
boolean balance_quotes)
{
this.scanner = scanner;
this.tag = tag;
this.url = url;
this.reader = reader;
this.currLine = currLine;
this.endTag = null;
this.nodeList = new NodeList();
this.endTagFound = false;
this.balance_quotes = balance_quotes;
}
public Tag scan() throws ParserException
{
this.startingLineNumber = reader.getLastLineNumber();
if (shouldCreateEndTagAndExit())
{
return createEndTagAndRepositionReader();
}
scanner.beforeScanningStarts();
Node currentNode = tag;
doEmptyXmlTagCheckOn(currentNode);
if (!endTagFound)
{
do
{
currentNode = reader.readElement(balance_quotes);
if (currentNode == null)
continue;
currLine = reader.getCurrentLine();
if (currentNode instanceof Tag)
doForceCorrectionCheckOn((Tag) currentNode);
doEmptyXmlTagCheckOn(currentNode);
if (!endTagFound)
doChildAndEndTagCheckOn(currentNode);
}
while (currentNode != null && !endTagFound);
}
if (endTag == null)
{
createCorrectionEndTagBefore(reader.getLastReadPosition() + 1);
}
this.endingLineNumber = reader.getLastLineNumber();
return createTag();
}
private boolean shouldCreateEndTagAndExit()
{
return scanner.shouldCreateEndTagAndExit();
}
private Tag createEndTagAndRepositionReader()
{
createCorrectionEndTagBefore(tag.elementBegin());
reader.setPosInLine(tag.elementBegin());
reader.setDontReadNextLine(true);
return endTag;
}
private void createCorrectionEndTagBefore(int pos)
{
String endTagName = tag.getTagName();
int endTagBegin = pos;
int endTagEnd = endTagBegin + endTagName.length() + 2;
endTag =
new EndTag(
new TagData(endTagBegin, endTagEnd, endTagName, currLine));
}
private void createCorrectionEndTagBefore(Tag possibleEndTagCauser)
{
String endTagName = tag.getTagName();
int endTagBegin = possibleEndTagCauser.elementBegin();
int endTagEnd = endTagBegin + endTagName.length() + 2;
possibleEndTagCauser.setTagBegin(endTagEnd + 1);
reader.addNextParsedNode(possibleEndTagCauser);
endTag =
new EndTag(
new TagData(endTagBegin, endTagEnd, endTagName, currLine));
}
private StringBuffer createModifiedLine(String endTagName, int endTagBegin)
{
StringBuffer newLine = new StringBuffer();
newLine.append(currLine.substring(0, endTagBegin));
newLine.append("</");
newLine.append(endTagName);
newLine.append(">");
newLine.append(currLine.substring(endTagBegin, currLine.length()));
return newLine;
}
private Tag createTag() throws ParserException
{
CompositeTag newTag =
(CompositeTag) scanner.createTag(
new TagData(
tag.elementBegin(),
endTag.elementEnd(),
startingLineNumber,
endingLineNumber,
tag.getText(),
currLine,
url,
tag.isEmptyXmlTag()),
new CompositeTagData(tag, endTag, nodeList));
for (int i = 0; i < newTag.getChildCount(); i++)
{
Node child = newTag.childAt(i);
child.setParent(newTag);
}
return newTag;
}
private void doChildAndEndTagCheckOn(Node currentNode)
{
if (currentNode instanceof EndTag)
{
EndTag possibleEndTag = (EndTag) currentNode;
if (isExpectedEndTag(possibleEndTag))
{
endTagFound = true;
endTag = possibleEndTag;
return;
}
}
nodeList.add(currentNode);
scanner.childNodeEncountered(currentNode);
}
private boolean isExpectedEndTag(EndTag possibleEndTag)
{
return possibleEndTag.getTagName().equals(tag.getTagName());
}
private void doEmptyXmlTagCheckOn(Node currentNode)
{
if (currentNode instanceof Tag)
{
Tag possibleEndTag = (Tag) currentNode;
if (isXmlEndTag(tag))
{
endTag = possibleEndTag;
endTagFound = true;
}
}
}
private void doForceCorrectionCheckOn(Tag possibleEndTagCauser)
{
if (isEndTagMissing(possibleEndTagCauser))
{
createCorrectionEndTagBefore(possibleEndTagCauser);
endTagFound = true;
}
}
private boolean isEndTagMissing(Tag possibleEndTag)
{
return scanner.isTagToBeEndedFor(possibleEndTag)
|| isSelfChildTagRecievedIncorrectly(possibleEndTag);
}
private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag)
{
return (
!(possibleEndTag instanceof EndTag)
&& !scanner.isAllowSelfChildren()
&& possibleEndTag.getTagName().equals(tag.getTagName()));
}
public boolean isXmlEndTag(Tag tag)
{
String tagText = tag.getText();
int lastSlash = tagText.lastIndexOf("/");
return (lastSlash == tagText.length() - 1 || tag.isEmptyXmlTag())
&& tag.getText().indexOf("://") == -1;
}
}
1.1
jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/ParserHelper.java
Index: ParserHelper.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.parserHelper;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.util.LinkProcessor;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.ParserFeedback;
public class ParserHelper implements Serializable
{
public ParserHelper()
{
super();
}
/**
* Opens a connection using the given url.
* @param url The url to open.
* @param feedback The ibject to use for messages or <code>null</code>.
* @exception ParserException if an i/o exception occurs accessing the url.
*/
public static URLConnection openConnection(
URL url,
ParserFeedback feedback)
throws ParserException
{
URLConnection ret;
try
{
ret = url.openConnection();
}
catch (IOException ioe)
{
String msg =
"HTMLParser.openConnection() : Error in opening a connection to "
+ url.toExternalForm();
ParserException ex = new ParserException(msg, ioe);
if (null != feedback)
feedback.error(msg, ex);
throw ex;
}
return (ret);
}
/**
* Opens a connection based on a given string.
* The string is either a file, in which case <code>file://localhost</code>
* is prepended to a canonical path derived from the string, or a url that
* begins with one of the known protocol strings, i.e. <code>http://</code>.
* Embedded spaces are silently converted to %20 sequences.
* @param string The name of a file or a url.
* @param feedback The object to use for messages or <code>null</code> for no
feedback.
* @exception ParserException if the string is not a valid url or file.
*/
public static URLConnection openConnection(
String string,
ParserFeedback feedback)
throws ParserException
{
final String prefix = "file://localhost";
String resource;
URL url;
StringBuffer buffer;
URLConnection ret;
try
{
url = new URL(LinkProcessor.fixSpaces(string));
ret = ParserHelper.openConnection(url, feedback);
}
catch (MalformedURLException murle)
{ // try it as a file
try
{
File file = new File(string);
resource = file.getCanonicalPath();
buffer = new StringBuffer(prefix.length() + resource.length());
buffer.append(prefix);
buffer.append(resource);
url = new URL(LinkProcessor.fixSpaces(buffer.toString()));
ret = ParserHelper.openConnection(url, feedback);
if (null != feedback)
feedback.info(url.toExternalForm());
}
catch (MalformedURLException murle2)
{
String msg =
"HTMLParser.openConnection() : Error in opening a connection to "
+ string;
ParserException ex = new ParserException(msg, murle2);
if (null != feedback)
feedback.error(msg, ex);
throw ex;
}
catch (IOException ioe)
{
String msg =
"HTMLParser.openConnection() : Error in opening a connection to "
+ string;
ParserException ex = new ParserException(msg, ioe);
if (null != feedback)
feedback.error(msg, ex);
throw ex;
}
}
return (ret);
}
/**
* Lookup a character set name.
* <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em>
* This uses reflection so the code will still run under prior JDK's but
* in that case the default is always returned.
* @param name The name to look up. One of the aliases for a character set.
* @param _default The name to return if the lookup fails.
*/
public static String findCharset(String name, String _default)
{
String ret;
try
{
Class cls;
java.lang.reflect.Method method;
Object object;
cls = Class.forName("java.nio.charset.Charset");
method = cls.getMethod("forName", new Class[] { String.class });
object = method.invoke(null, new Object[] { name });
method = cls.getMethod("name", new Class[] {
});
object = method.invoke(object, new Object[] {
});
ret = (String) object;
}
catch (ClassNotFoundException cnfe)
{
// for reflection exceptions, assume the name is correct
ret = name;
}
catch (NoSuchMethodException nsme)
{
// for reflection exceptions, assume the name is correct
ret = name;
}
catch (IllegalAccessException ia)
{
// for reflection exceptions, assume the name is correct
ret = name;
}
catch (java.lang.reflect.InvocationTargetException ita)
{
// java.nio.charset.IllegalCharsetNameException
// and java.nio.charset.UnsupportedCharsetException
// return the default
ret = _default;
}
return (ret);
}
}
1.1
jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/StringParser.java
Index: StringParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.parserHelper;
import org.htmlparser.Node;
import org.htmlparser.NodeReader;
import org.htmlparser.StringNode;
public class StringParser
{
private final static int BEFORE_PARSE_BEGINS_STATE = 0;
private final static int PARSE_HAS_BEGUN_STATE = 1;
private final static int PARSE_COMPLETED_STATE = 2;
private final static int PARSE_IGNORE_STATE = 3;
/**
* Returns true if the text at <code>pos</code> in <code>line</code> should be
scanned as a tag.
* Basically an open angle followed by a known special character or a letter.
* @param line The current line being parsed.
* @param pos The position in the line to examine.
* @return <code>true</code> if we think this is the start of a tag.
*/
private boolean beginTag(String line, int pos)
{
char ch;
boolean ret;
ret = false;
if (pos + 2 <= line.length())
if ('<' == line.charAt(pos))
{
ch = line.charAt(pos + 1);
// the order of these tests might be optimized for speed
if ('/' == ch
|| '%' == ch
|| Character.isLetter(ch)
|| '!' == ch)
ret = true;
}
return (ret);
}
/**
* Locate the StringNode within the input string, by parsing from the given
position
* @param reader HTML reader to be provided so as to allow reading of next line
* @param input Input String
* @param position Position to start parsing from
* @param balance_quotes If <code>true</code> enter ignoring state on
* encountering quotes.
*/
public Node find(
NodeReader reader,
String input,
int position,
boolean balance_quotes)
{
StringBuffer textBuffer = new StringBuffer();
int state = BEFORE_PARSE_BEGINS_STATE;
int textBegin = position;
int textEnd = position;
int inputLen = input.length();
char ch;
char ignore_ender = '\"';
for (int i = position;
(i < inputLen && state != PARSE_COMPLETED_STATE);
i++)
{
ch = input.charAt(i);
if (ch == '<' && state != PARSE_IGNORE_STATE)
{
if (beginTag(input, i))
{
state = PARSE_COMPLETED_STATE;
textEnd = i - 1;
}
}
if (balance_quotes && (ch == '\'' || ch == '"'))
{
if (state == PARSE_IGNORE_STATE)
{
if (ch == ignore_ender)
state = PARSE_HAS_BEGUN_STATE;
}
else
{
ignore_ender = ch;
state = PARSE_IGNORE_STATE;
}
}
if (state == BEFORE_PARSE_BEGINS_STATE)
{
state = PARSE_HAS_BEGUN_STATE;
}
if (state == PARSE_HAS_BEGUN_STATE || state == PARSE_IGNORE_STATE)
{
textBuffer.append(input.charAt(i));
}
// Patch by Cedric Rosa
if (state == BEFORE_PARSE_BEGINS_STATE && i == inputLen - 1)
state = PARSE_HAS_BEGUN_STATE;
if (state == PARSE_HAS_BEGUN_STATE && i == inputLen - 1)
{
do
{
input = reader.getNextLine();
if (input != null && input.length() == 0)
textBuffer.append(Node.getLineSeparator());
}
while (input != null && input.length() == 0);
if (input == null)
{
textEnd = i;
state = PARSE_COMPLETED_STATE;
}
else
{
textBuffer.append(Node.getLineSeparator());
inputLen = input.length();
i = -1;
}
}
}
return new StringNode(textBuffer, textBegin, textEnd);
}
}
1.1
jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/TagParser.java
Index: TagParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.parserHelper;
import java.util.StringTokenizer;
import org.htmlparser.Node;
import org.htmlparser.NodeReader;
import org.htmlparser.tags.Tag;
import org.htmlparser.tags.data.TagData;
import org.htmlparser.util.ParserFeedback;
public class TagParser
{
public final static int TAG_BEFORE_PARSING_STATE = 1;
public final static int TAG_BEGIN_PARSING_STATE = 1 << 2;
public final static int TAG_FINISHED_PARSING_STATE = 1 << 3;
public final static int TAG_ILLEGAL_STATE = 1 << 4;
public final static int TAG_IGNORE_DATA_STATE = 1 << 5;
public final static int TAG_IGNORE_BEGIN_TAG_STATE = 1 << 6;
public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE = 1 << 7;
public final static String ENCOUNTERED_QUERY_MESSAGE =
"TagParser : Encountered > after a query. Accepting without correction and
continuing parsing";
private ParserFeedback feedback;
public TagParser(ParserFeedback feedback)
{
this.feedback = feedback;
}
public Tag find(NodeReader reader, String input, int position)
{
int state = TAG_BEFORE_PARSING_STATE;
int i = position;
char ch;
char[] ignorechar = new char[1];
// holds the character we're looking for when in TAG_IGNORE_DATA_STATE
Tag tag =
new Tag(
new TagData(
position,
0,
reader.getLastLineNumber(),
0,
"",
input,
"",
false));
Bool encounteredQuery = new Bool(false);
while (i < tag.getTagLine().length()
&& state != TAG_FINISHED_PARSING_STATE
&& state != TAG_ILLEGAL_STATE)
{
ch = tag.getTagLine().charAt(i);
state =
automataInput(
encounteredQuery,
i,
state,
ch,
tag,
i,
ignorechar);
i = incrementCounter(i, reader, state, tag);
}
if (state == TAG_FINISHED_PARSING_STATE)
{
String tagLine = tag.getTagLine();
if (i > 1 && tagLine.charAt(i - 2) == '/')
{
tag.setEmptyXmlTag(true);
String tagContents = tag.getText();
tag.setText(tagContents.substring(0, tagContents.length() - 1));
}
return tag;
}
else
return null;
}
private int automataInput(
Bool encounteredQuery,
int i,
int state,
char ch,
Tag tag,
int pos,
char[] ignorechar)
{
state = checkIllegalState(i, state, ch, tag);
state = checkFinishedState(encounteredQuery, i, state, ch, tag, pos);
state = toggleIgnoringState(state, ch, ignorechar);
if (state == TAG_BEFORE_PARSING_STATE && ch != '<')
{
state = TAG_ILLEGAL_STATE;
}
if (state == TAG_IGNORE_DATA_STATE && ch == '<')
{
// If the next tag char is is close tag, then
// this is legal, we should continue
if (!isWellFormedTag(tag, pos))
state = TAG_IGNORE_BEGIN_TAG_STATE;
}
if (state == TAG_IGNORE_BEGIN_TAG_STATE && ch == '>')
{
state = TAG_IGNORE_DATA_STATE;
}
checkIfAppendable(encounteredQuery, state, ch, tag);
state = checkBeginParsingState(i, state, ch, tag);
return state;
}
private int checkBeginParsingState(int i, int state, char ch, Tag tag)
{
if (ch == '<'
&& (state == TAG_BEFORE_PARSING_STATE || state == TAG_ILLEGAL_STATE))
{
// Transition from State 0 to State 1 - Record data till > is encountered
tag.setTagBegin(i);
state = TAG_BEGIN_PARSING_STATE;
}
return state;
}
private boolean isWellFormedTag(Tag tag, int pos)
{
String inputLine = tag.getTagLine();
int closeTagPos = inputLine.indexOf('>', pos + 1);
int openTagPos = inputLine.indexOf('<', pos + 1);
return openTagPos > closeTagPos
|| (openTagPos == -1 && closeTagPos != -1);
}
private int checkFinishedState(
Bool encounteredQuery,
int i,
int state,
char ch,
Tag tag,
int pos)
{
if (ch == '>')
{
if (state == TAG_BEGIN_PARSING_STATE)
{
state = TAG_FINISHED_PARSING_STATE;
tag.setTagEnd(i);
}
else if (state == TAG_IGNORE_DATA_STATE)
{
if (encounteredQuery.getBoolean())
{
encounteredQuery.setBoolean(false);
feedback.info(ENCOUNTERED_QUERY_MESSAGE);
return state;
}
// Now, either this is a valid > input, and should be ignored,
// or it is a mistake in the html, in which case we need to correct
it *sigh*
if (isWellFormedTag(tag, pos))
return state;
state = TAG_FINISHED_PARSING_STATE;
tag.setTagEnd(i);
// Do Correction
// Correct the tag - assuming its grouped into name value pairs
// Remove all inverted commas.
correctTag(tag);
StringBuffer msg = new StringBuffer();
msg.append(
"HTMLTagParser : Encountered > inside inverted commas in line
\n");
msg.append(tag.getTagLine());
msg.append(", location ");
msg.append(i);
msg.append("\n");
for (int j = 0; j < i; j++)
msg.append(' ');
msg.append('^');
msg.append("\nAutomatically corrected.");
feedback.warning(msg.toString());
}
}
else if (
ch == '<'
&& state == TAG_BEGIN_PARSING_STATE
&& tag.getText().charAt(0) != '%')
{
state = TAG_FINISHED_PARSING_STATE;
tag.setTagEnd(i - 1);
i--;
}
return state;
}
private void checkIfAppendable(
Bool encounteredQuery,
int state,
char ch,
Tag tag)
{
if (state == TAG_IGNORE_DATA_STATE
|| state == TAG_BEGIN_PARSING_STATE
|| state == TAG_IGNORE_BEGIN_TAG_STATE)
{
if (ch == '?')
encounteredQuery.setBoolean(true);
tag.append(ch);
}
}
private int checkIllegalState(int i, int state, char ch, Tag tag)
{
if (ch == '/'
&& i > 0
&& tag.getTagLine().charAt(i - 1) == '<'
&& state != TAG_IGNORE_DATA_STATE
&& state != TAG_IGNORE_BEGIN_TAG_STATE)
{
state = TAG_ILLEGAL_STATE;
}
return state;
}
public void correctTag(Tag tag)
{
String tempText = tag.getText();
StringBuffer absorbedText = new StringBuffer();
char c;
for (int j = 0; j < tempText.length(); j++)
{
c = tempText.charAt(j);
if (c != '"')
absorbedText.append(c);
}
// Go into the next stage.
StringBuffer result = insertInvertedCommasCorrectly(absorbedText);
tag.setText(result.toString());
}
public StringBuffer insertInvertedCommasCorrectly(StringBuffer absorbedText)
{
StringBuffer result = new StringBuffer();
StringTokenizer tok =
new StringTokenizer(absorbedText.toString(), "=", false);
String token;
token = (String) tok.nextToken();
result.append(token + "=");
for (; tok.hasMoreTokens();)
{
token = (String) tok.nextToken();
token = pruneSpaces(token);
result.append('"');
int lastIndex = token.lastIndexOf(' ');
if (lastIndex != -1 && tok.hasMoreTokens())
{
result.append(token.substring(0, lastIndex));
result.append('"');
result.append(token.substring(lastIndex, token.length()));
}
else
result.append(token + '"');
if (tok.hasMoreTokens())
result.append("=");
}
return result;
}
public static String pruneSpaces(String token)
{
int firstSpace;
int lastSpace;
firstSpace = token.indexOf(' ');
while (firstSpace == 0)
{
token = token.substring(1, token.length());
firstSpace = token.indexOf(' ');
}
lastSpace = token.lastIndexOf(' ');
while (lastSpace == token.length() - 1)
{
token = token.substring(0, token.length() - 1);
lastSpace = token.lastIndexOf(' ');
}
return token;
}
/**
* Check for quote character (" or ') and switch to TAG_IGNORE_DATA_STATE
* or back out to TAG_BEGIN_PARSING_STATE.
* @param state The current state.
* @param ch The character to test.
* @param ignorechar The character that caused entry to TAG_IGNORE_DATA_STATE.
*/
private int toggleIgnoringState(int state, char ch, char[] ignorechar)
{
if (state == TAG_IGNORE_DATA_STATE)
{
if (ch == ignorechar[0])
state = TAG_BEGIN_PARSING_STATE;
}
else if (state == TAG_BEGIN_PARSING_STATE)
if (ch == '"' || ch == '\'')
{
state = TAG_IGNORE_DATA_STATE;
ignorechar[0] = ch;
}
return (state);
}
public int incrementCounter(int i, NodeReader reader, int state, Tag tag)
{
String nextLine = null;
if ((state == TAG_BEGIN_PARSING_STATE
|| state == TAG_IGNORE_DATA_STATE
|| state == TAG_IGNORE_BEGIN_TAG_STATE)
&& i == tag.getTagLine().length() - 1)
{
// The while loop below is a bug fix contributed by
// Annette Doyle - see testcase
HTMLImageScannerTest.testImageTagOnMultipleLines()
// Further modified by Somik Raha, to remove bug -
HTMLTagTest.testBrokenTag
int numLinesAdvanced = 0;
do
{
nextLine = reader.getNextLine();
numLinesAdvanced++;
}
while (nextLine != null && nextLine.length() == 0);
if (nextLine == null)
{
// This means we have a broken tag. Fill in an end tag symbol here.
nextLine = ">";
}
else
{
// This means this is just a new line, hence add the new line
character
tag.append(Node.getLineSeparator());
}
// Ensure blank lines are included in tag's 'tagLines'
while (--numLinesAdvanced > 0)
tag.setTagLine("");
// We need to continue parsing to the next line
tag.setTagLine(nextLine);
i = -1;
}
return ++i;
}
// Class provided for thread safety in TagParser
class Bool
{
private boolean boolValue;
Bool(boolean boolValue)
{
this.boolValue = boolValue;
}
public void setBoolean(boolean boolValue)
{
this.boolValue = boolValue;
}
public boolean getBoolean()
{
return boolValue;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]