parserapplications LinkExtractor.java MailRipper.java Robot.java StringExtractor.java package.html

woolfel Thu, 16 Oct 2003 15:20:07 -0700

woolfel     2003/10/16 12:11:59

  Added:       src/htmlparser/org/htmlparser/parserapplications
                        LinkExtractor.java MailRipper.java Robot.java
                        StringExtractor.java package.html
  Log:
  more htmlparser code
  thanks to the htmlparser developers and derrick
  
  Revision  Changes    Path
  1.1                  
jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/LinkExtractor.java
  
  Index: LinkExtractor.java
  ===================================================================
  /*
   * ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   * notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   * notice, this list of conditions and the following disclaimer in
   * the documentation and/or other materials provided with the
   * distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   * if any, must include the following acknowledgment:
   * "This product includes software developed by the
   * Apache Software Foundation (http://www.apache.org/)."
   * Alternately, this acknowledgment may appear in the software itself,
   * if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   * "Apache JMeter" must not be used to endorse or promote products
   * derived from this software without prior written permission. For
   * written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   * "Apache JMeter", nor may "Apache" appear in their name, without
   * prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   * 
   */
  
  // The developers of JMeter and Apache are greatful to the developers
  // of HTMLParser for giving Apache Software Foundation a non-exclusive
  // license. The performance benefits of HTMLParser are clear and the
  // users of JMeter will benefit from the hard work the HTMLParser
  // team. For detailed information about HTMLParser, the project is
  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
  //
  // HTMLParser was originally created by Somik Raha in 2000. Since then
  // a healthy community of users has formed and helped refine the
  // design so that it is able to tackle the difficult task of parsing
  // dirty HTML. Derrick Oswald is the current lead developer and was kind
  // enough to assist JMeter.
  
  
  package org.htmlparser.parserapplications;
  
  
  import org.htmlparser.Node;
  import org.htmlparser.Parser;
  import org.htmlparser.tags.LinkTag;
  import org.htmlparser.util.ParserException;
  
  /**
   * LinkExtractor extracts all the links from the given webpage
   * and prints them on standard output.
   */
  public class LinkExtractor
  {
      private String location;
      private Parser parser;
      public LinkExtractor(String location)
      {
          this.location = location;
          try
          {
              this.parser = new Parser(location); // Create the parser object
              parser.registerScanners();
              // Register standard scanners (Very Important)
          }
          catch (ParserException e)
          {
              e.printStackTrace();
          }
  
      }
      public void extractLinks() throws ParserException
      {
          System.out.println("Parsing " + location + " for links...");
          Node[] links = parser.extractAllNodesThatAre(LinkTag.class);
          for (int i = 0; i < links.length; i++)
          {
              LinkTag linkTag = (LinkTag) links[i];
              // Print it
              //                        System.out.println(linkTag.toString()); 
              System.out.println(linkTag.getLink());
              // To extract only mail addresses, uncomment the following line
              //                        if (linkTag.isMailLink()) 
System.out.println(linkTag.getLink());
          }
      }
  
      public static void main(String[] args)
      {
          if (args.length < 0)
          {
              System.err.println(
                  "Syntax Error : Please provide the location(URL or file) to parse");
              System.exit(-1);
          }
          LinkExtractor linkExtractor = new LinkExtractor(args[0]);
          try
          {
              linkExtractor.extractLinks();
          }
          catch (ParserException e)
          {
              e.printStackTrace();
          }
      }
  }
  
  
  1.1                  
jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/MailRipper.java
  
  Index: MailRipper.java
  ===================================================================
  /*
   * ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   * notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   * notice, this list of conditions and the following disclaimer in
   * the documentation and/or other materials provided with the
   * distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   * if any, must include the following acknowledgment:
   * "This product includes software developed by the
   * Apache Software Foundation (http://www.apache.org/)."
   * Alternately, this acknowledgment may appear in the software itself,
   * if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   * "Apache JMeter" must not be used to endorse or promote products
   * derived from this software without prior written permission. For
   * written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   * "Apache JMeter", nor may "Apache" appear in their name, without
   * prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   * 
   */
  
  // The developers of JMeter and Apache are greatful to the developers
  // of HTMLParser for giving Apache Software Foundation a non-exclusive
  // license. The performance benefits of HTMLParser are clear and the
  // users of JMeter will benefit from the hard work the HTMLParser
  // team. For detailed information about HTMLParser, the project is
  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
  //
  // HTMLParser was originally created by Somik Raha in 2000. Since then
  // a healthy community of users has formed and helped refine the
  // design so that it is able to tackle the difficult task of parsing
  // dirty HTML. Derrick Oswald is the current lead developer and was kind
  // enough to assist JMeter.
  
  
  package org.htmlparser.parserapplications;
  import java.util.Enumeration;
  import java.util.Vector;
  
  import org.htmlparser.Node;
  import org.htmlparser.Parser;
  import org.htmlparser.tags.LinkTag;
  import org.htmlparser.util.DefaultParserFeedback;
  import org.htmlparser.util.NodeIterator;
  import org.htmlparser.util.ParserException;
  
  
  /**
   * MailRipper will rip out all the mail addresses from a given web page
   * Pass a web site (or html file on your local disk) as an argument.
   */
  public class MailRipper
  {
      private org.htmlparser.Parser parser;
      /**
       * MailRipper c'tor takes the url to be ripped
       * @param resourceLocation url to be ripped
       */
      public MailRipper(String resourceLocation)
      {
          try
          {
              parser = new Parser(resourceLocation, new DefaultParserFeedback());
              parser.registerScanners();
          }
          catch (ParserException e)
          {
              System.err.println("Could not create parser object");
              e.printStackTrace();
          }
      }
      public static void main(String[] args)
      {
          System.out.println("Mail Ripper v" + Parser.getVersion());
          if (args.length < 1 || args[0].equals("-help"))
          {
              System.out.println();
              System.out.println(
                  "Syntax : java -classpath htmlparser.jar 
org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
              System.out.println();
              System.out.println(
                  "   <resourceLocn> the name of the file to be parsed (with complete 
path ");
              System.out.println(
                  "                  if not in current directory)");
              System.out.println("   -help This screen");
              System.out.println();
              System.out.println(
                  "HTML Parser home page : http://htmlparser.sourceforge.net";);
              System.out.println();
              System.out.println(
                  "Example : java -classpath htmlparser.jar 
com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net";);
              System.out.println();
              System.out.println(
                  "If you have any doubts, please join the HTMLParser mailing list 
(user/developer) from the HTML Parser home page instead of mailing any of the 
contributors directly. You will be surprised with the quality of open source support. 
");
              System.exit(-1);
          }
          String resourceLocation = "http://htmlparser.sourceforge.net";;
          if (args.length != 0)
              resourceLocation = args[0];
  
          MailRipper ripper = new MailRipper(resourceLocation);
          System.out.println("Ripping Site " + resourceLocation);
          try
          {
              for (Enumeration e = ripper.rip(); e.hasMoreElements();)
              {
                  LinkTag tag = (LinkTag) e.nextElement();
                  System.out.println("Ripped mail address : " + tag.getLink());
              }
          }
          catch (ParserException e)
          {
              e.printStackTrace();
          }
      }
      /**
       * Rip all mail addresses from the given url, and return an enumeration of such 
mail addresses.
       * @return Enumeration of mail addresses (a vector of LinkTag)
       */
      public Enumeration rip() throws ParserException
      {
          Node node;
          Vector mailAddresses = new Vector();
          for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
          {
              node = e.nextNode();
              if (node instanceof LinkTag)
              {
                  LinkTag linkTag = (LinkTag) node;
                  if (linkTag.isMailLink())
                      mailAddresses.addElement(linkTag);
              }
          }
          return mailAddresses.elements();
      }
  }
  
  
  
  1.1                  
jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/Robot.java
  
  Index: Robot.java
  ===================================================================
  /*
   * ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   * notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   * notice, this list of conditions and the following disclaimer in
   * the documentation and/or other materials provided with the
   * distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   * if any, must include the following acknowledgment:
   * "This product includes software developed by the
   * Apache Software Foundation (http://www.apache.org/)."
   * Alternately, this acknowledgment may appear in the software itself,
   * if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   * "Apache JMeter" must not be used to endorse or promote products
   * derived from this software without prior written permission. For
   * written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   * "Apache JMeter", nor may "Apache" appear in their name, without
   * prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   * 
   */
  
  // The developers of JMeter and Apache are greatful to the developers
  // of HTMLParser for giving Apache Software Foundation a non-exclusive
  // license. The performance benefits of HTMLParser are clear and the
  // users of JMeter will benefit from the hard work the HTMLParser
  // team. For detailed information about HTMLParser, the project is
  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
  //
  // HTMLParser was originally created by Somik Raha in 2000. Since then
  // a healthy community of users has formed and helped refine the
  // design so that it is able to tackle the difficult task of parsing
  // dirty HTML. Derrick Oswald is the current lead developer and was kind
  // enough to assist JMeter.
  
  package org.htmlparser.parserapplications;
  import org.htmlparser.Node;
  import org.htmlparser.Parser;
  import org.htmlparser.tags.LinkTag;
  import org.htmlparser.util.DefaultParserFeedback;
  import org.htmlparser.util.NodeIterator;
  import org.htmlparser.util.ParserException;
  /**
   * The Robot Crawler application will crawl through urls recursively, based on a 
depth value.
   */
  public class Robot
  {
      private org.htmlparser.Parser parser;
      /**
       * Robot crawler - Provide the starting url 
       */
      public Robot(String resourceLocation)
      {
          try
          {
              parser = new Parser(resourceLocation, new DefaultParserFeedback());
              parser.registerScanners();
          }
          catch (ParserException e)
          {
              System.err.println("Error, could not create parser object");
              e.printStackTrace();
          }
      }
      /**
       * Crawl using a given crawl depth.
       * @param crawlDepth Depth of crawling
       */
      public void crawl(int crawlDepth) throws ParserException
      {
          try
          {
              crawl(parser, crawlDepth);
          }
          catch (ParserException e)
          {
              throw new ParserException(
                  "HTMLParserException at crawl(" + crawlDepth + ")",
                  e);
          }
      }
      /**
       * Crawl using a given parser object, and a given crawl depth.
       * @param parser Parser object
       * @param crawlDepth Depth of crawling
       */
      public void crawl(Parser parser, int crawlDepth) throws ParserException
      {
          System.out.println(" crawlDepth = " + crawlDepth);
          for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
          {
              Node node = e.nextNode();
              if (node instanceof LinkTag)
              {
                  LinkTag linkTag = (LinkTag) node;
                  {
                      if (!linkTag.isMailLink())
                      {
                          if (linkTag.getLink().toUpperCase().indexOf("HTM")
                              != -1
                              || linkTag.getLink().toUpperCase().indexOf("COM")
                                  != -1
                              || linkTag.getLink().toUpperCase().indexOf("ORG")
                                  != -1)
                          {
                              if (crawlDepth > 0)
                              {
                                  Parser newParser =
                                      new Parser(
                                          linkTag.getLink(),
                                          new DefaultParserFeedback());
                                  newParser.registerScanners();
                                  System.out.print(
                                      "Crawling to " + linkTag.getLink());
                                  crawl(newParser, crawlDepth - 1);
                              }
                              else
                                  System.out.println(linkTag.getLink());
                          }
                      }
                  }
              }
          }
      }
  
      public static void main(String[] args)
      {
          System.out.println("Robot Crawler v" + Parser.getVersion());
          if (args.length < 2 || args[0].equals("-help"))
          {
              System.out.println();
              System.out.println(
                  "Syntax : java -classpath htmlparser.jar 
org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>");
              System.out.println();
              System.out.println(
                  "   <resourceLocn> the name of the file to be parsed (with complete 
path ");
              System.out.println(
                  "                  if not in current directory)");
              System.out.println(
                  "   <depth> No of links to be followed from each link");
              System.out.println("   -help This screen");
              System.out.println();
              System.out.println(
                  "HTML Parser home page : http://htmlparser.sourceforge.net";);
              System.out.println();
              System.out.println(
                  "Example : java -classpath htmlparser.jar 
com.kizna.parserapplications.Robot http://www.google.com 3");
              System.out.println();
              System.out.println(
                  "If you have any doubts, please join the HTMLParser mailing list 
(user/developer) from the HTML Parser home page instead of mailing any of the 
contributors directly. You will be surprised with the quality of open source support. 
");
              System.exit(-1);
          }
          String resourceLocation = "";
          int crawlDepth = 1;
          if (args.length != 0)
              resourceLocation = args[0];
          if (args.length == 2)
              crawlDepth = Integer.valueOf(args[1]).intValue();
  
          Robot robot = new Robot(resourceLocation);
          System.out.println("Crawling Site " + resourceLocation);
          try
          {
              robot.crawl(crawlDepth);
          }
          catch (ParserException e)
          {
              e.printStackTrace();
          }
      }
  }
  
  
  
  1.1                  
jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/StringExtractor.java
  
  Index: StringExtractor.java
  ===================================================================
  /*
   * ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   * notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   * notice, this list of conditions and the following disclaimer in
   * the documentation and/or other materials provided with the
   * distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   * if any, must include the following acknowledgment:
   * "This product includes software developed by the
   * Apache Software Foundation (http://www.apache.org/)."
   * Alternately, this acknowledgment may appear in the software itself,
   * if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   * "Apache JMeter" must not be used to endorse or promote products
   * derived from this software without prior written permission. For
   * written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   * "Apache JMeter", nor may "Apache" appear in their name, without
   * prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   * 
   */
  
  // The developers of JMeter and Apache are greatful to the developers
  // of HTMLParser for giving Apache Software Foundation a non-exclusive
  // license. The performance benefits of HTMLParser are clear and the
  // users of JMeter will benefit from the hard work the HTMLParser
  // team. For detailed information about HTMLParser, the project is
  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
  //
  // HTMLParser was originally created by Somik Raha in 2000. Since then
  // a healthy community of users has formed and helped refine the
  // design so that it is able to tackle the difficult task of parsing
  // dirty HTML. Derrick Oswald is the current lead developer and was kind
  // enough to assist JMeter.
  
  package org.htmlparser.parserapplications;
  
  import org.htmlparser.beans.StringBean;
  import org.htmlparser.util.ParserException;
  
  public class StringExtractor
  {
      private String resource;
  
      /**
       * Construct a StringExtractor to read from the given resource.
       * @param resource Either a URL or a file name.
       */
      public StringExtractor(String resource)
      {
          this.resource = resource;
      }
  
      /**
       * Extract the text from a page.
       * @param links if <code>true</code> include hyperlinks in output.
       * @return The textual contents of the page.
       */
      public String extractStrings(boolean links) throws ParserException
      {
          StringBean sb;
  
          sb = new StringBean();
          sb.setLinks(links);
          sb.setURL(resource);
  
          return (sb.getStrings());
      }
  
      /**
       * Mainline.
       * @param args The command line arguments.
       */
      public static void main(String[] args)
      {
          boolean links;
          String url;
          StringExtractor se;
  
          links = false;
          url = null;
          for (int i = 0; i < args.length; i++)
              if (args[i].equalsIgnoreCase("-links"))
                  links = true;
              else
                  url = args[i];
          if (null != url)
          {
              se = new StringExtractor(url);
              try
              {
                  System.out.println(se.extractStrings(links));
              }
              catch (ParserException e)
              {
                  e.printStackTrace();
              }
          }
          else
              System.out.println(
                  "Usage: java -classpath htmlparser.jar 
org.htmlparser.parserapplications.StringExtractor [-links] url");
      }
  }
  
  
  
  1.1                  
jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/package.html
  
  Index: package.html
  ===================================================================
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
  <html>
  <head>
  <!--
    @(#)package.html    1.60 98/01/27
  
  /*
   * ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   * notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   * notice, this list of conditions and the following disclaimer in
   * the documentation and/or other materials provided with the
   * distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   * if any, must include the following acknowledgment:
   * "This product includes software developed by the
   * Apache Software Foundation (http://www.apache.org/)."
   * Alternately, this acknowledgment may appear in the software itself,
   * if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   * "Apache JMeter" must not be used to endorse or promote products
   * derived from this software without prior written permission. For
   * written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   * "Apache JMeter", nor may "Apache" appear in their name, without
   * prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   * 
   */
  
  // The developers of JMeter and Apache are greatful to the developers
  // of HTMLParser for giving Apache Software Foundation a non-exclusive
  // license. The performance benefits of HTMLParser are clear and the
  // users of JMeter will benefit from the hard work the HTMLParser
  // team. For detailed information about HTMLParser, the project is
  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
  //
  // HTMLParser was originally created by Somik Raha in 2000. Since then
  // a healthy community of users has formed and helped refine the
  // design so that it is able to tackle the difficult task of parsing
  // dirty HTML. Derrick is the current lead developer and was kind
  // enough to assist JMeter.
  -->
  </head>
  <body bgcolor="white">
  Developers and users alike should try out the applications in this package. The code 
of these applications will give
  a good idea about the capabilities of the HTML Parser, and its intended usage. The 
binary releases of html parser would 
  typically contain these applications in runnable form.
  
  <h2>Related Documentation</h2>
  
  For overviews, tutorials, examples, guides, and tool documentation, please see:
  <ul>
    <li><a href="http://htmlparser.sourceforge.net";>HTML Parser Home Page</a>
  </ul>
  
  <!-- Put @see and @since tags down here. -->
  
  </body>
  </html>


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications LinkExtractor.java MailRipper.java Robot.java StringExtractor.java package.html

Reply via email to