Resending this as I'm not sure if it went through. -------- Original Message -------- Subject: Patch to fix AnchorModifier Date: Fri, 26 Oct 2001 16:04:17 +0900 From: Ryan Shaw <[EMAIL PROTECTED]> To: [EMAIL PROTECTED]
Hello, Attached is a patch to fix the AnchorModifier so that it works as advertised in the documentation. It also patches HTMLParser to handle forms with no "action" attribute (common on pages using JavaScript). One problem it still has: setting it up to spider pages like in the documentation does the following: Sampling http://www.startpage.com/ Sampling http://www.startpage.com/products.html Sampling http://www.startpage.com/ Sampling http://www.startpage.com/contact.html Sampling http://www.startpage.com/ etc... In other words, each time through the loop it hits the starting page again. Setting the starting page sample to be OnceOnly doesn't work either...Any ideas? Ryan
Index: src/org/apache/jmeter/protocol/http/modifier/AnchorModifier.java =================================================================== RCS file: /home/cvspublic/jakarta-jmeter/src/org/apache/jmeter/protocol/http/modifier/AnchorModifier.java,v retrieving revision 1.6 diff -u -r1.6 AnchorModifier.java --- src/org/apache/jmeter/protocol/http/modifier/AnchorModifier.java 2001/08/31 00:46:44 1.6 +++ src/org/apache/jmeter/protocol/http/modifier/AnchorModifier.java 2001/10/26 +06:46:04 @@ -1,284 +1,292 @@ -/* - * ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache JMeter" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact [EMAIL PROTECTED] - * - * 5. Products derived from this software may not be called "Apache", - * "Apache JMeter", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * <http://www.apache.org/>. - */ -package org.apache.jmeter.protocol.http.modifier; - -import org.apache.jmeter.config.ResponseBasedModifier; -import org.apache.jmeter.gui.JMeterComponentModel; -import org.apache.jmeter.config.ConfigElement; -import org.apache.jmeter.samplers.Entry; -import org.apache.jmeter.protocol.http.config.UrlConfig; -import org.apache.jmeter.protocol.http.parser.HtmlParser; -import org.apache.jmeter.protocol.http.sampler.HTTPSampler; -import org.apache.jmeter.samplers.SampleResult; -import org.apache.jmeter.samplers.Sampler; -import org.apache.jmeter.config.Argument; -import org.apache.jmeter.config.Arguments; -import org.apache.jmeter.gui.NamePanel; - -import java.util.*; -import java.io.*; -import org.w3c.dom.*; -import org.xml.sax.SAXException; -import java.net.*; - -/************************************************************ - * Title: Jakarta-JMeter Description: Copyright: Copyright (c) 2001 Company: - * Apache - * - *@author Michael Stover - *@created $Date: 2001/08/31 00:46:44 $ - *@version 1.0 - ***********************************************************/ - -public class AnchorModifier implements JMeterComponentModel, ResponseBasedModifier, - Serializable -{ - - private static Random rand = new Random(); - - /************************************************************ - * !ToDo (Constructor description) - ***********************************************************/ - public AnchorModifier() - { - } - - public void addJMeterComponent(JMeterComponentModel obj) - { - } - - public void uncompile() - { - } - - public Collection getAddList() - { - return null; - } - - public void setName(String name) - { - } - - public String getName() - { - return getClassLabel(); - } - - public Class getGuiClass() - { - return NamePanel.class; - } - - /************************************************************ - * !ToDoo (Method description) - * - *@return !ToDo (Return description) - ***********************************************************/ - public String getClassLabel() - { - return "HTML Link parser"; - } - - /************************************************************ - * !ToDo (Method description) - * - *@return !ToDo (Return description) - ***********************************************************/ - public Object clone() - { - return this; - } - - /************************************************************ - * Modifies an Entry object based on HTML response text. - * - *@param entry !ToDo (Parameter description) - *@param result !ToDo (Parameter description) - *@return !ToDo (Return description) - ***********************************************************/ - public boolean modifyEntry(Entry entry, SampleResult result) - { - List potentialLinks = new ArrayList(); - String responseText = (String)result.getValue(SampleResult.TEXT_RESPONSE); - UrlConfig config = (UrlConfig)entry.getConfigElement(UrlConfig.class); - Document html; - try - { - html = (Document)HtmlParser.getDOM(responseText); - } - catch (SAXException e) - { - return false; - } - addAnchorUrls(html, result, config, potentialLinks); - addFormUrls(html,result,config,potentialLinks); - if (potentialLinks.size() > 0) - { - UrlConfig url = (UrlConfig)potentialLinks.get(rand.nextInt(potentialLinks.size())); - config.setDomain(url.getDomain()); - config.setPath(url.getPath()); - if(url.getMethod().equals(UrlConfig.POST)) - { - Iterator iter = config.getArguments().iterator(); - while(iter.hasNext()) - { - Argument arg = (Argument)iter.next(); - modifyArgument(arg,url.getArguments()); - } - } - else - { - config.removeArguments(); - config.parseArguments(url.getQueryString()); - } - config.setProtocol(url.getProtocol()); - return true; - } - return false; - } - - private void modifyArgument(Argument arg,Arguments args) - { - List possibleReplacements = new ArrayList(); - Iterator iter = args.iterator(); - Argument replacementArg; - while (iter.hasNext()) - { - replacementArg = (Argument)iter.next(); - try - { - if(HtmlParser.isArgumentMatched(replacementArg,arg)) - { - possibleReplacements.add(replacementArg); - } - } - catch (Exception ex) { - ex.printStackTrace(); - } - } - if(possibleReplacements.size() > 0) - { - replacementArg = (Argument)possibleReplacements.get(rand.nextInt(possibleReplacements.size())); - arg.setName(replacementArg.getName()); - arg.setValue(replacementArg.getValue()); - } - } - - /************************************************************ - * !ToDo - * - *@param config !ToDo - ***********************************************************/ - public void addConfigElement(ConfigElement config) - { - } - - private void addFormUrls(Document html,SampleResult result,UrlConfig config, - List potentialLinks) - { - NodeList nodeList = html.getElementsByTagName("form"); - for(int x = 0;x < nodeList.getLength();x++) - { - Node tempNode = nodeList.item(x); - Node form = tempNode.cloneNode(true); - try - { - UrlConfig newUrl = HtmlParser.createURLFromForm(form, - (URL)result.getValue(HTTPSampler.URL)); - newUrl.setMethod(UrlConfig.POST); - if(HtmlParser.isAnchorMatched(newUrl,config)) - { - potentialLinks.add(newUrl); - } - } - catch (MalformedURLException e) - { - } - catch (org.apache.oro.text.regex.MalformedPatternException e) - { - System.out.println("Bad pattern"); - } - } - } - - private void addAnchorUrls(Document html, SampleResult result, UrlConfig config, List potentialLinks) - { - NodeList nodeList = html.getElementsByTagName("a"); - for (int i = 0; i < nodeList.getLength(); i++) - { - Node tempNode = nodeList.item(i); - NamedNodeMap nnm = tempNode.getAttributes(); - Node namedItem = nnm.getNamedItem("href"); - String hrefStr = namedItem.getNodeValue(); - try - { - UrlConfig newUrl = HtmlParser.createUrlFromAnchor(hrefStr, (URL)result.getValue(HTTPSampler.URL)); - newUrl.setMethod(UrlConfig.GET); - if (HtmlParser.isAnchorMatched(newUrl, config)) - { - potentialLinks.add(newUrl); - } - } - catch (MalformedURLException e) - { - } - catch (org.apache.oro.text.regex.MalformedPatternException e) - { - System.out.println("Bad pattern"); - } - } - } -} +/* + * ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache JMeter" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache JMeter", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.jmeter.protocol.http.modifier; + +import org.apache.jmeter.config.ResponseBasedModifier; +import org.apache.jmeter.gui.JMeterComponentModel; +import org.apache.jmeter.config.ConfigElement; +import org.apache.jmeter.config.AbstractConfigElement; +import org.apache.jmeter.samplers.Entry; +import org.apache.jmeter.protocol.http.config.UrlConfig; +import org.apache.jmeter.protocol.http.parser.HtmlParser; +import org.apache.jmeter.protocol.http.sampler.HTTPSampler; +import org.apache.jmeter.samplers.SampleResult; +import org.apache.jmeter.samplers.Sampler; +import org.apache.jmeter.config.Argument; +import org.apache.jmeter.config.Arguments; +import org.apache.jmeter.gui.NamePanel; + +import java.util.*; +import java.io.*; +import org.w3c.dom.*; +import org.xml.sax.SAXException; +import java.net.*; + +/************************************************************ + * Title: Jakarta-JMeter Description: Copyright: Copyright (c) 2001 Company: + * Apache + * + *@author Michael Stover + *@created $Date: 2001/08/31 00:46:44 $ + *@version 1.0 + ***********************************************************/ + +public class AnchorModifier extends AbstractConfigElement + implements JMeterComponentModel, ResponseBasedModifier +{ + + private static Random rand = new Random(); + + /************************************************************ + * !ToDo (Constructor description) + ***********************************************************/ + public AnchorModifier() + { + } + + public void addJMeterComponent(JMeterComponentModel obj) + { + } + + public void uncompile() + { + } + + public Collection getAddList() + { + return null; + } + + public void setName(String name) + { + } + + public String getName() + { + return getClassLabel(); + } + + public Class getGuiClass() + { + return NamePanel.class; + } + + /************************************************************ + * !ToDoo (Method description) + * + *@return !ToDo (Return description) + ***********************************************************/ + public String getClassLabel() + { + return "HTML Link parser"; + } + + /************************************************************ + * !ToDo (Method description) + * + *@return !ToDo (Return description) + ***********************************************************/ + public Object clone() + { + return this; + } + + public boolean expectsModification() + { + return false; + } + + /************************************************************ + * Modifies an Entry object based on HTML response text. + * + *@param entry !ToDo (Parameter description) + *@param result !ToDo (Parameter description) + *@return !ToDo (Return description) + ***********************************************************/ + public boolean modifyEntry(Entry entry, SampleResult result) + { + List potentialLinks = new ArrayList(); + String responseText = +(String)result.getValue(SampleResult.TEXT_RESPONSE); + UrlConfig config = (UrlConfig)entry.getConfigElement(UrlConfig.class); + Document html; + try + { + html = (Document)HtmlParser.getDOM(responseText); + } + catch (SAXException e) + { + return false; + } + addAnchorUrls(html, result, config, potentialLinks); + addFormUrls(html,result,config,potentialLinks); + if (potentialLinks.size() > 0) + { + UrlConfig url = +(UrlConfig)potentialLinks.get(rand.nextInt(potentialLinks.size())); + config.setDomain(url.getDomain()); + config.setPath(url.getPath()); + if(url.getMethod().equals(UrlConfig.POST)) + { + Iterator iter = config.getArguments().iterator(); + while(iter.hasNext()) + { + Argument arg = (Argument)iter.next(); + modifyArgument(arg,url.getArguments()); + } + } + else + { + config.removeArguments(); + config.parseArguments(url.getQueryString()); + } + config.setProtocol(url.getProtocol()); + return true; + } + return false; + } + + private void modifyArgument(Argument arg,Arguments args) + { + List possibleReplacements = new ArrayList(); + Iterator iter = args.iterator(); + Argument replacementArg; + while (iter.hasNext()) + { + replacementArg = (Argument)iter.next(); + try + { + if(HtmlParser.isArgumentMatched(replacementArg,arg)) + { + possibleReplacements.add(replacementArg); + } + } + catch (Exception ex) { + ex.printStackTrace(); + } + } + if(possibleReplacements.size() > 0) + { + replacementArg = +(Argument)possibleReplacements.get(rand.nextInt(possibleReplacements.size())); + arg.setName(replacementArg.getName()); + arg.setValue(replacementArg.getValue()); + } + } + + /************************************************************ + * !ToDo + * + *@param config !ToDo + ***********************************************************/ + public void addConfigElement(ConfigElement config) + { + } + + private void addFormUrls(Document html,SampleResult result,UrlConfig config, + List potentialLinks) + { + NodeList nodeList = html.getElementsByTagName("form"); + for(int x = 0;x < nodeList.getLength();x++) + { + Node tempNode = nodeList.item(x); + Node form = tempNode.cloneNode(true); + try + { + UrlConfig newUrl = HtmlParser.createURLFromForm(form, + +(URL)result.getValue(HTTPSampler.URL)); + if (newUrl != null) { + newUrl.setMethod(UrlConfig.POST); + if(HtmlParser.isAnchorMatched(newUrl,config)) + { + potentialLinks.add(newUrl); + } + } + } + catch (MalformedURLException e) + { + } + catch (org.apache.oro.text.regex.MalformedPatternException e) + { + System.out.println("Bad pattern"); + } + } + } + + private void addAnchorUrls(Document html, SampleResult result, UrlConfig +config, List potentialLinks) + { + NodeList nodeList = html.getElementsByTagName("a"); + for (int i = 0; i < nodeList.getLength(); i++) + { + Node tempNode = nodeList.item(i); + NamedNodeMap nnm = tempNode.getAttributes(); + Node namedItem = nnm.getNamedItem("href"); + String hrefStr = namedItem.getNodeValue(); + try + { + UrlConfig newUrl = +HtmlParser.createUrlFromAnchor(hrefStr, (URL)result.getValue(HTTPSampler.URL)); + newUrl.setMethod(UrlConfig.GET); + if (HtmlParser.isAnchorMatched(newUrl, config)) + { + potentialLinks.add(newUrl); + } + } + catch (MalformedURLException e) + { + } + catch (org.apache.oro.text.regex.MalformedPatternException e) + { + System.out.println("Bad pattern"); + } + } + } +} Index: src/org/apache/jmeter/protocol/http/parser/HtmlParser.java =================================================================== RCS file: /home/cvspublic/jakarta-jmeter/src/org/apache/jmeter/protocol/http/parser/HtmlParser.java,v retrieving revision 1.14 diff -u -r1.14 HtmlParser.java --- src/org/apache/jmeter/protocol/http/parser/HtmlParser.java 2001/09/28 05:57:15 1.14 +++ src/org/apache/jmeter/protocol/http/parser/HtmlParser.java 2001/10/26 06:46:05 @@ -84,412 +84,414 @@ public class HtmlParser implements Serializable { - private int compilerOptions = Perl5Compiler.CASE_INSENSITIVE_MASK | - Perl5Compiler.MULTILINE_MASK | Perl5Compiler.READ_ONLY_MASK; + private int compilerOptions = Perl5Compiler.CASE_INSENSITIVE_MASK | + Perl5Compiler.MULTILINE_MASK | Perl5Compiler.READ_ONLY_MASK; - protected static Category catClass = - Category.getInstance(HtmlParser.class.getName()); + protected static Category catClass = + Category.getInstance(HtmlParser.class.getName()); - protected static String utfEncodingName; + protected static String utfEncodingName; - private transient static Perl5Compiler compiler = new Perl5Compiler(); - - private transient static Perl5Matcher matcher = new Perl5Matcher(); - - /************************************************************ - * Constructor for the HtmlParser object - ***********************************************************/ - - public HtmlParser() - { - } - - /************************************************************ - * !ToDoo (Method description) - * - *@param newLink !ToDo (Parameter description) - *@param config !ToDo (Parameter description) - *@return !ToDo (Return description) - *@exception MalformedPatternException !ToDo (Exception description) - ***********************************************************/ - public static synchronized boolean isAnchorMatched(UrlConfig newLink, UrlConfig config) throws MalformedPatternException - { - boolean ok = true; - - Iterator iter = config.getArguments().iterator(); - String query = newLink.getQueryString(); - - if (query == null && config.getArguments().getArgumentCount() > 0) - { - return false; - } - - while (iter.hasNext()) - { - Argument item = (Argument)iter.next(); - - if (!(ok = ok && matcher.contains(query, compiler.compile(item.getName() + "=" + item.getValue())))) - { - return false; - } - } - - if (!(ok = ok && matcher.matches(newLink.getDomain(), - compiler.compile(config.getDomain())))) - { - return false; - } - - if (!(ok = ok && matcher.matches(newLink.getPath(), compiler.compile("[/]*"+config.getPath())))) - { - return false; - } - - if (!(ok = ok && matcher.matches(newLink.getProtocol(), compiler.compile(config.getProtocol())))) - { - return false; - } - - return ok; - } - - public static synchronized boolean isArgumentMatched(Argument arg,Argument patternArg) throws MalformedPatternException - { - return matcher.matches(arg.getName(),compiler.compile(patternArg.getName())) && - matcher.matches((String)arg.getValue(),compiler.compile((String)patternArg.getValue())); - } - - /************************************************************ - * Returns <code>tidy</code> as HTML parser - * - *@return a <code>tidy</code> HTML parser - ***********************************************************/ - public static Tidy getParser() - { - catClass.debug("Start : getParser1"); - Tidy tidy = new Tidy(); - tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8); - tidy.setQuiet(true); - tidy.setShowWarnings(false); - - if (catClass.isDebugEnabled()) - { - catClass.debug("getParser1 : tidy parser created - " + tidy); - } - - catClass.debug("End : getParser1"); - - return tidy; - } - - /************************************************************ - * Returns a node representing a whole xml given an xml document - * - *@param text an xml document - *@return a node representing a whole xml - *@exception SAXException !ToDo (Exception description) - ***********************************************************/ - public static Node getDOM(String text) throws SAXException - { - catClass.debug("Start : getDOM1"); - - try - { - Node node = getParser().parseDOM(new - ByteArrayInputStream(text.getBytes(getUTFEncodingName())), null); - - if (catClass.isDebugEnabled()) - { - catClass.debug("node : " + node); - } - - catClass.debug("End : getDOM1"); - - return node; - } - catch (UnsupportedEncodingException e) - { - catClass.error("getDOM1 : Unsupported encoding exception - " + e); - catClass.debug("End : getDOM1"); - throw new RuntimeException("UTF-8 encoding failed"); - } - } - - /************************************************************ - * Returns the encoding type which is different for different jdks even though - * the mean the same thing i.e. UTF8 or UTF-8 - * - *@return either UTF8 or UTF-8 depending on the jdk version - ***********************************************************/ - public static String getUTFEncodingName() - { - catClass.debug("Start : getUTFEncodingName1"); - - if (utfEncodingName == null) - { - String versionNum = System.getProperty("java.version"); - - if (catClass.isDebugEnabled()) - { - catClass.debug("getUTFEncodingName1 : versionNum - " + versionNum); - } - - if (versionNum.startsWith("1.1")) - { - utfEncodingName = "UTF8"; - } - else - { - utfEncodingName = "UTF-8"; - } - } - - if (catClass.isDebugEnabled()) - { - catClass.debug("getUTFEncodingName1 : Returning utfEncodingName - " + - utfEncodingName); - } - - catClass.debug("End : getUTFEncodingName1"); - - return utfEncodingName; - } - - /************************************************************ - * !ToDo (Method description) - * - *@return !ToDo (Return description) - ***********************************************************/ - public static Document createEmptyDoc() - { - return new Tidy().createEmptyDocument(); - } - - /************************************************************ - * Create a new URL based on an HREF string plus a contextual URL object. - * Given that an HREF string might be of three possible forms, some processing - * is required. - * - *@param parsedUrlString !ToDo (Parameter description) - *@param context !ToDo (Parameter description) - *@return !ToDo (Return description) - *@exception MalformedURLException !ToDo (Exception description) - ***********************************************************/ - public static UrlConfig createUrlFromAnchor(String parsedUrlString, URL context) throws MalformedURLException - { - UrlConfig url = new UrlConfig(); - url.setDomain(context.getHost()); - url.setProtocol(context.getProtocol()); - url.setPort(context.getPort()); - - // In JDK1.3, we can get the path using getPath(). However, in JDK1.2, we have to parse - // the file to obtain the path. In the source for JDK1.3.1, they determine the path to - // be from the start of the file up to the LAST question mark (if any). - String contextPath = null; - String contextFile = context.getFile(); - int indexContextQuery = contextFile.lastIndexOf('?'); - if (indexContextQuery != -1) - { - contextPath = contextFile.substring(0, indexContextQuery); - } - else - { - contextPath = contextFile; - } - - int queryStarts = parsedUrlString.indexOf("?"); - - if (queryStarts == -1) - { - queryStarts = parsedUrlString.length(); - } - - if (parsedUrlString.startsWith("/")) - { - url.setPath(parsedUrlString.substring(0, queryStarts)); - } - else if (parsedUrlString.startsWith("..")) - { - url.setPath(contextPath.substring(0, contextPath.substring(0, - contextPath.lastIndexOf("/")).lastIndexOf("/")) + - parsedUrlString.substring(2, queryStarts)); - } - else if (!parsedUrlString.toLowerCase().startsWith("http")) - { - url.setPath(contextPath.substring(0, contextPath.lastIndexOf("/")) + - "/" + parsedUrlString.substring(0, queryStarts)); - } - else - { - URL u = new URL(parsedUrlString); - - // Determine the path. (See JDK1.2/1.3 comment above.) - String uPath = null; - String uFile = u.getFile(); - int indexUQuery = uFile.lastIndexOf('?'); - if (indexUQuery != -1) - { - uPath = uFile.substring(0, indexUQuery); - } - else - { - uPath = uFile; - } - - url.setPath(uPath); - url.setDomain(u.getHost()); - url.setProtocol(u.getProtocol()); - url.setPort(u.getPort()); - } - - if (queryStarts < parsedUrlString.length()) - { - url.parseArguments(parsedUrlString.substring(queryStarts + 1)); - } - - return url; - } - - /************************************************************ - * !ToDo (Method description) - * - *@param formNode !ToDo (Parameter description) - *@param context !ToDo (Parameter description) - *@return !ToDo (Return description) - *@exception MalformedURLException !ToDo (Exception description) - ***********************************************************/ - - public static UrlConfig createURLFromForm(Node formNode, URL context) throws MalformedURLException - { - String selectName = null; - NodeList childNodes = formNode.getChildNodes(); - NamedNodeMap atts = formNode.getAttributes(); - String action = atts.getNamedItem("action").getNodeValue(); - UrlConfig url = createUrlFromAnchor(action, context); - recurseForm(childNodes, url, selectName); - - return url; - } - - /************************************************************ - * !ToDo (Class description) - * - *@author $Author: khammond $ - *@created $Date: 2001/09/28 05:57:15 $ - *@version $Revision: 1.14 $ - ***********************************************************/ - public static class Test extends TestCase - { - private static Category catClass = - Category.getInstance(Test.class.getName()); - - /************************************************************ - * !ToDo (Constructor description) - * - *@param name !ToDo (Parameter description) - ***********************************************************/ - public Test(String name) - { - super(name); - } - - /************************************************************ - * !ToDo - ***********************************************************/ - public void testGetUTFEncodingName() - { - catClass.debug("Start : testGetUTFEncodingName1"); - String javaVersion = System.getProperty("java.version"); - utfEncodingName = null; - System.setProperty("java.version", "1.1"); - assertEquals("UTF8", HtmlParser.getUTFEncodingName()); - // need to clear utfEncodingName variable first 'cos - // getUTFEncodingName checks to see if it's null - utfEncodingName = null; - System.setProperty("java.version", "1.2"); - assertEquals("UTF-8", HtmlParser.getUTFEncodingName()); - System.setProperty("java.version", javaVersion); - catClass.debug("End : testGetUTFEncodingName1"); - } - - /************************************************************ - * !ToDo - ***********************************************************/ - protected void setUp() - { - } - } - - private static void recurseForm(NodeList childNodes, UrlConfig url, String selectName) - { - for (int x = 0; x < childNodes.getLength(); x++) - { - Node tempNode = childNodes.item(x); - NamedNodeMap nodeAtts = tempNode.getAttributes(); - String tag = tempNode.getNodeName(); - try - { - if (tag.equalsIgnoreCase("input")) - { - url.addArgument(getAttributeValue(nodeAtts,"name"), - getAttributeValue(nodeAtts,"value")); - } - else if (tag.equalsIgnoreCase("textarea")) - { - url.addArgument(getAttributeValue(nodeAtts,"name"), - tempNode.getFirstChild().getNodeValue()); - } - else if (tag.equalsIgnoreCase("select")) - { - selectName = getAttributeValue(nodeAtts,"name"); - } - else if (tag.equalsIgnoreCase("option")) - { - String value = getAttributeValue(nodeAtts,"value"); - if (value == null || value.equals("")) - { - value = tempNode.getFirstChild().getNodeValue(); - } - - url.addArgument(selectName, value); - } - } - catch (Exception ex) { - System.out.println("Some bad HTML "+printNode(tempNode)); - } - - recurseForm(tempNode.getChildNodes(),url,selectName); - } - } - - private static String getAttributeValue(NamedNodeMap att,String attName) - { - try { - return att.getNamedItem(attName).getNodeValue(); - } - catch (Exception ex) - { - return ""; - } - } - - private static String printNode(Node node) - { - StringBuffer buf = new StringBuffer(); - buf.append("<"); - buf.append(node.getNodeName()); - NamedNodeMap atts = node.getAttributes(); - for(int x = 0;x < atts.getLength();x++) - { - buf.append(" "); - buf.append(atts.item(x).getNodeName()); - buf.append("=\""); - buf.append(atts.item(x).getNodeValue()); - buf.append("\""); - } + private transient static Perl5Compiler compiler = new Perl5Compiler(); + + private transient static Perl5Matcher matcher = new Perl5Matcher(); + + /************************************************************ + * Constructor for the HtmlParser object + ***********************************************************/ + + public HtmlParser() + { + } + + /************************************************************ + * !ToDoo (Method description) + * + *@param newLink !ToDo (Parameter description) + *@param config !ToDo (Parameter description) + *@return !ToDo (Return description) + *@exception MalformedPatternException !ToDo (Exception description) + ***********************************************************/ + public static synchronized boolean isAnchorMatched(UrlConfig newLink, +UrlConfig config) throws MalformedPatternException + { + boolean ok = true; + + Iterator iter = config.getArguments().iterator(); + String query = newLink.getQueryString(); + + if (query == null && config.getArguments().getArgumentCount() > 0) + { + return false; + } + + while (iter.hasNext()) + { + Argument item = (Argument)iter.next(); + + if (!(ok = ok && matcher.contains(query, +compiler.compile(item.getName() + "=" + item.getValue())))) + { + return false; + } + } + + if (!(ok = ok && matcher.matches(newLink.getDomain(), +compiler.compile(config.getDomain())))) + { + return false; + } + + if (!(ok = ok && matcher.matches(newLink.getPath(), +compiler.compile("[/]*"+config.getPath())))) + { + return false; + } + + if (!(ok = ok && matcher.matches(newLink.getProtocol(), +compiler.compile(config.getProtocol())))) + { + return false; + } + + return ok; + } + + public static synchronized boolean isArgumentMatched(Argument arg,Argument +patternArg) throws MalformedPatternException + { + return +matcher.matches(arg.getName(),compiler.compile(patternArg.getName())) && + +matcher.matches((String)arg.getValue(),compiler.compile((String)patternArg.getValue())); + } + + /************************************************************ + * Returns <code>tidy</code> as HTML parser + * + *@return a <code>tidy</code> HTML parser + ***********************************************************/ + public static Tidy getParser() + { + catClass.debug("Start : getParser1"); + Tidy tidy = new Tidy(); + tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8); + tidy.setQuiet(true); + tidy.setShowWarnings(false); + + if (catClass.isDebugEnabled()) + { + catClass.debug("getParser1 : tidy parser created - " + tidy); + } + + catClass.debug("End : getParser1"); + + return tidy; + } + + /************************************************************ + * Returns a node representing a whole xml given an xml document + * + *@param text an xml document + *@return a node representing a whole xml + *@exception SAXException !ToDo (Exception description) + ***********************************************************/ + public static Node getDOM(String text) throws SAXException + { + catClass.debug("Start : getDOM1"); + + try + { + Node node = getParser().parseDOM(new + +ByteArrayInputStream(text.getBytes(getUTFEncodingName())), null); + + if (catClass.isDebugEnabled()) + { + catClass.debug("node : " + node); + } + + catClass.debug("End : getDOM1"); + + return node; + } + catch (UnsupportedEncodingException e) + { + catClass.error("getDOM1 : Unsupported encoding exception - " ++ e); + catClass.debug("End : getDOM1"); + throw new RuntimeException("UTF-8 encoding failed"); + } + } + + /************************************************************ + * Returns the encoding type which is different for different jdks even +though + * the mean the same thing i.e. UTF8 or UTF-8 + * + *@return either UTF8 or UTF-8 depending on the jdk version + ***********************************************************/ + public static String getUTFEncodingName() + { + catClass.debug("Start : getUTFEncodingName1"); + + if (utfEncodingName == null) + { + String versionNum = System.getProperty("java.version"); + + if (catClass.isDebugEnabled()) + { + catClass.debug("getUTFEncodingName1 : versionNum - " ++ versionNum); + } + + if (versionNum.startsWith("1.1")) + { + utfEncodingName = "UTF8"; + } + else + { + utfEncodingName = "UTF-8"; + } + } + + if (catClass.isDebugEnabled()) + { + catClass.debug("getUTFEncodingName1 : Returning +utfEncodingName - " + + utfEncodingName); + } + + catClass.debug("End : getUTFEncodingName1"); + + return utfEncodingName; + } + + /************************************************************ + * !ToDo (Method description) + * + *@return !ToDo (Return description) + ***********************************************************/ + public static Document createEmptyDoc() + { + return new Tidy().createEmptyDocument(); + } + + /************************************************************ + * Create a new URL based on an HREF string plus a contextual URL object. + * Given that an HREF string might be of three possible forms, some +processing + * is required. + * + *@param parsedUrlString !ToDo (Parameter description) + *@param context !ToDo (Parameter description) + *@return !ToDo (Return description) + *@exception MalformedURLException !ToDo (Exception description) + ***********************************************************/ + public static UrlConfig createUrlFromAnchor(String parsedUrlString, URL +context) throws MalformedURLException + { + UrlConfig url = new UrlConfig(); + url.setDomain(context.getHost()); + url.setProtocol(context.getProtocol()); + url.setPort(context.getPort()); + + // In JDK1.3, we can get the path using getPath(). However, in +JDK1.2, we have to parse + // the file to obtain the path. In the source for JDK1.3.1, they +determine the path to + // be from the start of the file up to the LAST question mark (if +any). + String contextPath = null; + String contextFile = context.getFile(); + int indexContextQuery = contextFile.lastIndexOf('?'); + if (indexContextQuery != -1) + { + contextPath = contextFile.substring(0, indexContextQuery); + } + else + { + contextPath = contextFile; + } + + int queryStarts = parsedUrlString.indexOf("?"); + + if (queryStarts == -1) + { + queryStarts = parsedUrlString.length(); + } + + if (parsedUrlString.startsWith("/")) + { + url.setPath(parsedUrlString.substring(0, queryStarts)); + } + else if (parsedUrlString.startsWith("..")) + { + url.setPath(contextPath.substring(0, contextPath.substring(0, + +contextPath.lastIndexOf("/")).lastIndexOf("/")) + + parsedUrlString.substring(2, queryStarts)); + } + else if (!parsedUrlString.toLowerCase().startsWith("http")) + { + url.setPath(contextPath.substring(0, +contextPath.lastIndexOf("/")) + + "/" + parsedUrlString.substring(0, queryStarts)); + } + else + { + URL u = new URL(parsedUrlString); + + // Determine the path. (See JDK1.2/1.3 comment above.) + String uPath = null; + String uFile = u.getFile(); + int indexUQuery = uFile.lastIndexOf('?'); + if (indexUQuery != -1) + { + uPath = uFile.substring(0, indexUQuery); + } + else + { + uPath = uFile; + } + + url.setPath(uPath); + url.setDomain(u.getHost()); + url.setProtocol(u.getProtocol()); + url.setPort(u.getPort()); + } + + if (queryStarts < parsedUrlString.length()) + { + url.parseArguments(parsedUrlString.substring(queryStarts + +1)); + } + + return url; + } + + /************************************************************ + * !ToDo (Method description) + * + *@param formNode !ToDo (Parameter description) + *@param context !ToDo (Parameter description) + *@return !ToDo (Return description) + *@exception MalformedURLException !ToDo (Exception description) + ***********************************************************/ + + public static UrlConfig createURLFromForm(Node formNode, URL context) throws +MalformedURLException + { + String selectName = null; + NodeList childNodes = formNode.getChildNodes(); + NamedNodeMap atts = formNode.getAttributes(); + UrlConfig url = null; + if (atts.getNamedItem("action") != null) + { + String action = atts.getNamedItem("action").getNodeValue(); + url = createUrlFromAnchor(action, context); + recurseForm(childNodes, url, selectName); + } + return url; + } + + /************************************************************ + * !ToDo (Class description) + * + *@author $Author: khammond $ + *@created $Date: 2001/09/28 05:57:15 $ + *@version $Revision: 1.14 $ + ***********************************************************/ + public static class Test extends TestCase + { + private static Category catClass = + Category.getInstance(Test.class.getName()); + + /************************************************************ + * !ToDo (Constructor description) + * + *@param name !ToDo (Parameter description) + ***********************************************************/ + public Test(String name) + { + super(name); + } + + /************************************************************ + * !ToDo + ***********************************************************/ + public void testGetUTFEncodingName() + { + catClass.debug("Start : testGetUTFEncodingName1"); + String javaVersion = System.getProperty("java.version"); + utfEncodingName = null; + System.setProperty("java.version", "1.1"); + assertEquals("UTF8", HtmlParser.getUTFEncodingName()); + // need to clear utfEncodingName variable first 'cos + // getUTFEncodingName checks to see if it's null + utfEncodingName = null; + System.setProperty("java.version", "1.2"); + assertEquals("UTF-8", HtmlParser.getUTFEncodingName()); + System.setProperty("java.version", javaVersion); + catClass.debug("End : testGetUTFEncodingName1"); + } + + /************************************************************ + * !ToDo + ***********************************************************/ + protected void setUp() + { + } + } + + private static void recurseForm(NodeList childNodes, UrlConfig url, String +selectName) + { + for (int x = 0; x < childNodes.getLength(); x++) + { + Node tempNode = childNodes.item(x); + NamedNodeMap nodeAtts = tempNode.getAttributes(); + String tag = tempNode.getNodeName(); + try + { + if (tag.equalsIgnoreCase("input")) + { + +url.addArgument(getAttributeValue(nodeAtts,"name"), + +getAttributeValue(nodeAtts,"value")); + } + else if (tag.equalsIgnoreCase("textarea")) + { + +url.addArgument(getAttributeValue(nodeAtts,"name"), + +tempNode.getFirstChild().getNodeValue()); + } + else if (tag.equalsIgnoreCase("select")) + { + selectName = +getAttributeValue(nodeAtts,"name"); + } + else if (tag.equalsIgnoreCase("option")) + { + String value = +getAttributeValue(nodeAtts,"value"); + if (value == null || value.equals("")) + { + value = +tempNode.getFirstChild().getNodeValue(); + } + + url.addArgument(selectName, value); + } + } + catch (Exception ex) { + System.out.println("Some bad HTML +"+printNode(tempNode)); + } + + recurseForm(tempNode.getChildNodes(),url,selectName); + } + } + + private static String getAttributeValue(NamedNodeMap att,String attName) + { + try { + return att.getNamedItem(attName).getNodeValue(); + } + catch (Exception ex) + { + return ""; + } + } + + private static String printNode(Node node) + { + StringBuffer buf = new StringBuffer(); + buf.append("<"); + buf.append(node.getNodeName()); + NamedNodeMap atts = node.getAttributes(); + for(int x = 0;x < atts.getLength();x++) + { + buf.append(" "); + buf.append(atts.item(x).getNodeName()); + buf.append("=\""); + buf.append(atts.item(x).getNodeValue()); + buf.append("\""); + } - buf.append(">"); + buf.append(">"); - return buf.toString(); - } + return buf.toString(); + } }
-- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>
