generation LinkStatusGenerator.java

vgritsenko Fri, 16 Aug 2002 20:02:50 -0700

vgritsenko    2002/08/16 20:25:39

  Modified:    src/java/org/apache/cocoon/generation
                        LinkStatusGenerator.java
  Log:
  Formatting changes
  
  Revision  Changes    Path
  1.6       +88 -88    
xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java
  
  Index: LinkStatusGenerator.java
  ===================================================================
  RCS file: 
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- LinkStatusGenerator.java  4 Aug 2002 18:33:51 -0000       1.5
  +++ LinkStatusGenerator.java  17 Aug 2002 03:25:39 -0000      1.6
  @@ -1,36 +1,36 @@
   /*
  - 
  +
    ============================================================================
                      The Apache Software License, Version 1.1
    ============================================================================
  - 
  +
    Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  - 
  +
    Redistribution and use in source and binary forms, with or without modifica-
    tion, are permitted provided that the following conditions are met:
  - 
  +
    1. Redistributions of  source code must  retain the above copyright  notice,
       this list of conditions and the following disclaimer.
  - 
  +
    2. Redistributions in binary form must reproduce the above copyright notice,
       this list of conditions and the following disclaimer in the documentation
       and/or other materials provided with the distribution.
  - 
  +
    3. The end-user documentation included with the redistribution, if any, must
       include  the following  acknowledgment:  "This product includes  software
       developed  by the  Apache Software Foundation  (http://www.apache.org/)."
       Alternately, this  acknowledgment may  appear in the software itself,  if
       and wherever such third-party acknowledgments normally appear.
  - 
  +
    4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
       used to  endorse or promote  products derived from  this software without
       prior written permission. For written permission, please contact
       [EMAIL PROTECTED]
  - 
  +
    5. Products  derived from this software may not  be called "Apache", nor may
       "Apache" appear  in their name,  without prior written permission  of the
       Apache Software Foundation.
  - 
  +
    THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
    INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
  @@ -41,12 +41,12 @@
    ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
    (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
    THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  - 
  +
    This software  consists of voluntary contributions made  by many individuals
    on  behalf of the Apache Software  Foundation and was  originally created by
    Stefano Mazzocchi  <[EMAIL PROTECTED]>. For more  information on the Apache
    Software Foundation, please see <http://www.apache.org/>.
  - 
  +
    */
   package org.apache.cocoon.generation;
   
  @@ -91,22 +91,22 @@
       /** The URI of the namespace of this generator. */
       protected static final String URI =
       "http://apache.org/cocoon/linkstatus/2.0";;
  -    
  +
       /** The namespace prefix for this namespace. */
       protected static final String PREFIX = "linkstatus";
  -    
  +
       /* Node and attribute names */
       protected static final String TOP_NODE_NAME         = "linkstatus";
       protected static final String LINK_NODE_NAME         = "link";
  -    
  +
       protected static final String HREF_ATTR_NAME    = "href";
       protected static final String REFERRER_ATTR_NAME    = "referrer";
       protected static final String CONTENT_ATTR_NAME    = "content";
       protected static final String STATUS_ATTR_NAME    = "status";
       protected static final String MESSAGE_ATTR_NAME    = "message";
  -    
  +
       protected AttributesImpl attributes = new AttributesImpl();
  -    
  +
       /**
        * Config element name specifying expected link content-typ.
        * <p>
  @@ -116,7 +116,7 @@
        * @since
        */
       public final static String LINK_CONTENT_TYPE_CONFIG = "link-content-type";
  -    
  +
       /**
        * Default value of <code>link-content-type</code> configuration value.
        * <p>
  @@ -126,7 +126,7 @@
        * @since
        */
       public final String LINK_CONTENT_TYPE_DEFAULT = "application/x-cocoon-links";
  -    
  +
       /**
        * Config element name specifying query-string appendend for requesting links
        * of an URL.
  @@ -146,7 +146,7 @@
        * @since
        */
       public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
  -    
  +
       /**
        * Config element name specifying excluding regular expression pattern.
        * <p>
  @@ -156,7 +156,7 @@
        * @since
        */
       public final static String EXCLUDE_CONFIG = "exclude";
  -    
  +
       /**
        * Config element name specifying including regular expression pattern.
        * <p>
  @@ -166,7 +166,7 @@
        * @since
        */
       public final static String INCLUDE_CONFIG = "include";
  -    
  +
       /**
        * Config element name specifying http header value for user-Agent.
        * <p>
  @@ -183,7 +183,7 @@
        * @since
        */
       public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
  -    
  +
       /**
        * Config element name specifying http header value for accept.
        * <p>
  @@ -202,42 +202,42 @@
        * @since
        */
       public final static String ACCEPT_DEFAULT = "*/*";
  -    
  +
       private String linkViewQuery = LINK_VIEW_QUERY_DEFAULT;
       private String linkContentType = LINK_CONTENT_TYPE_DEFAULT;
       private HashSet excludeCrawlingURL;
       private HashSet includeCrawlingURL;
       private String userAgent = USER_AGENT_DEFAULT;
       private String accept = ACCEPT_DEFAULT;
  -    
  +
       private HashSet crawled;
       private HashSet linksToProcess;
  -    
  +
       /**
        * Stores links to process and the referrer links
        */
       private class Link {
           private URL url;
           private String referrer;
  -        
  +
           public Link( URL url, String referrer ) {
               this.url = url;
               this.referrer = referrer;
           }
  -        
  +
           public URL getURL() {
               return url;
           }
  -        
  +
           public String getReferrer() {
               return referrer;
           }
  -        
  +
           public boolean equals( Link l ) {
               return url.equals( l.getURL());
           }
       }
  -    
  +
       /**
        * Configure the crawler component.
        * <p>
  @@ -264,7 +264,7 @@
        */
       public void configure(Configuration configuration)
       throws ConfigurationException {
  -        
  +
           Configuration[] children;
           children = configuration.getChildren(INCLUDE_CONFIG);
           if (children != null && children.length > 0) {
  @@ -283,7 +283,7 @@
                   }
               }
           }
  -        
  +
           children = configuration.getChildren(EXCLUDE_CONFIG);
           if (children != null && children.length > 0) {
               excludeCrawlingURL = new HashSet();
  @@ -304,7 +304,7 @@
               excludeCrawlingURL = new HashSet();
               setDefaultExcludeFromCrawling();
           }
  -        
  +
           Configuration child;
           String value;
           child = configuration.getChild(LINK_CONTENT_TYPE_CONFIG, false);
  @@ -321,7 +321,7 @@
                   this.linkViewQuery = value.trim();
               }
           }
  -        
  +
           child = configuration.getChild(USER_AGENT_CONFIG, false);
           if (child != null) {
               value = child.getValue();
  @@ -329,7 +329,7 @@
                   this.userAgent = value;
               }
           }
  -        
  +
           child = configuration.getChild(ACCEPT_CONFIG, false);
           if (child != null) {
               value = child.getValue();
  @@ -338,20 +338,20 @@
               }
           }
       }
  -    
  +
       public void setup(SourceResolver resolver, Map objectModel, String src, 
Parameters par)
       throws ProcessingException, SAXException, IOException {
  -        
  +
           super.setup(resolver, objectModel, src, par);
  -        
  +
           /* Create a reusable attributes for creating nodes */
           this.attributes = new AttributesImpl();
  -        
  +
           // already done in configure...
           //excludeCrawlingURL = new HashSet();
           //this.setDefaultExcludeFromCrawling();
       }
  -    
  +
       /**
        * Generate XML data.
        *
  @@ -363,40 +363,40 @@
       public void generate()
       throws SAXException, ProcessingException {
           try {
  -            
  +
               crawled = new HashSet();
               linksToProcess = new HashSet();
  -            
  +
               URL root = new URL(source);
               linksToProcess.add(new Link( root, ""));
  -            
  -            
  +
  +
               if (getLogger().isDebugEnabled()) {
                   getLogger().debug("crawl URL " + root);
               }
  -            
  +
               this.contentHandler.startDocument();
               this.contentHandler.startPrefixMapping(PREFIX,URI);
  -            
  +
               attributes.clear();
               super.contentHandler.startElement(URI, TOP_NODE_NAME, 
URI+':'+TOP_NODE_NAME, attributes);
  -            
  +
               while (linksToProcess.size() > 0) {
                   Iterator i = linksToProcess.iterator();
  -                
  +
                   if (i.hasNext()) {
                       // fetch a URL
                       Link link = (Link) i.next();
                       URL url = link.getURL();
  -                    
  +
                       // remove it from the to-do list
                       linksToProcess.remove(link);
  -                    
  +
                       String new_url_link = processURL(url, link.getReferrer());
  -                    
  +
                       // calc all links from this url
                       if (new_url_link != null) {
  -                        
  +
                           List url_links = getLinksFromConnection(new_url_link, url);
                           if (url_links != null) {
                               // add links of this url to the to-do list
  @@ -405,7 +405,7 @@
                       }
                   }
               }
  -            
  +
               super.contentHandler.endElement(URI, TOP_NODE_NAME, 
URI+':'+TOP_NODE_NAME);
               this.contentHandler.endPrefixMapping(PREFIX);
               this.contentHandler.endDocument();
  @@ -414,7 +414,7 @@
               throw new ResourceNotFoundException("Could not read source ", ioe);
           }
       }
  -    
  +
       /**
        * Default exclude patterns.
        * <p>
  @@ -438,7 +438,7 @@
               ".*\\.js(\\?.*)?$",
               ".*\\.css(\\?.*)?$"
           };
  -        
  +
           for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) {
               String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
               try {
  @@ -449,8 +449,8 @@
               }
           }
       }
  -    
  -    
  +
  +
       /**
        * Retrieve a list of links of a url
        *
  @@ -469,28 +469,28 @@
               URL url_link = new URL( url_link_string );
               URLConnection conn = url_link.openConnection();
               String content_type = conn.getContentType();
  -            
  +
               if (content_type == null) {
                   getLogger().warn( "No content type available for " + 
String.valueOf( url_link_string ) );
                   // caller checks if null
                   return url_links;
               }
  -            
  +
               if (getLogger().isDebugEnabled()) {
                   getLogger().debug("Content-type: " + content_type);
               }
  -            
  +
               if (content_type.equals(linkContentType)) {
                   url_links = new ArrayList();
  -                
  +
                   InputStream is = conn.getInputStream();
                   br = new BufferedReader(new InputStreamReader(is));
  -                
  +
                   // content is supposed to be a list of links,
                   // relative to current URL
                   String line;
                   String referrer = url_of_referrer.toString();
  -                
  +
                   while ((line = br.readLine()) != null) {
                       URL new_url = new URL(url_link, line);
                       boolean add_url = true;
  @@ -498,22 +498,22 @@
                       if (add_url) {
                           add_url &= !url_links.contains(new_url);
                       }
  -                    
  +
                       // don't add new_url if it has been crawled already
                       if (add_url) {
                           add_url &= !crawled.contains(new_url.toString());
                       }
  -                    
  +
                       Link new_link = new Link( new_url, referrer );
                       if (add_url) {
                           add_url &= !linksToProcess.contains(new_link);
                       }
  -                    
  +
                       // don't add if is not matched by existing include definition
                       if (add_url) {
                           add_url &= isIncludedURL(new_url.toString());
                       }
  -                    
  +
                       if (add_url) {
                           if (getLogger().isDebugEnabled()) {
                               getLogger().debug("Add URL: " + new_url.toString());
  @@ -536,7 +536,7 @@
           }
           return url_links;
       }
  -    
  +
       /**
        * Generate xml attributes of a url, calculate url for retrieving links
        *
  @@ -546,43 +546,43 @@
        *   and not an included-url.
        */
       protected String processURL(URL url, String referrer) throws SAXException {
  -        
  +
           if (getLogger().isDebugEnabled()) {
               getLogger().debug("getLinks URL " + url);
           }
  -        
  +
           String result = null;
  -        
  +
           // don't try to investigate a url which has been crawled already
           if (crawled.contains(url.toString())) {
               return null;
           }
  -        
  +
           // mark it as crawled
           crawled.add(url.toString());
  -        
  +
           attributes.clear();
           attributes.addAttribute("", HREF_ATTR_NAME,
           HREF_ATTR_NAME, "CDATA", url.toString());
           attributes.addAttribute("", REFERRER_ATTR_NAME,
           REFERRER_ATTR_NAME, "CDATA", referrer);
  -        
  +
           // Output url, referrer, content-type, status, message for traversable url's
           HttpURLConnection h = null;
           try {
  -            
  +
               URLConnection links_url_connection = url.openConnection();
               h = (HttpURLConnection)links_url_connection;
               String content_type = links_url_connection.getContentType();
  -            
  +
               attributes.addAttribute("", CONTENT_ATTR_NAME,
               CONTENT_ATTR_NAME, "CDATA",
               content_type);
  -            
  +
               attributes.addAttribute("", MESSAGE_ATTR_NAME,
               MESSAGE_ATTR_NAME, "CDATA",
               h.getResponseMessage());
  -            
  +
               attributes.addAttribute("", STATUS_ATTR_NAME,
               STATUS_ATTR_NAME, "CDATA",
               String.valueOf(h.getResponseCode()));
  @@ -595,7 +595,7 @@
                   h.disconnect();
               }
           }
  -        
  +
           // don't try to get links of a url which is excluded from crawling
           // try to get links of a url which is included for crawling
           if (!isExcludedURL(url.toString()) && isIncludedURL( url.toString() )) {
  @@ -604,13 +604,13 @@
               + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&")
               + linkViewQuery;
           }
  -        
  +
           super.contentHandler.startElement(URI, LINK_NODE_NAME, 
URI+':'+LINK_NODE_NAME, attributes);
           super.contentHandler.endElement(URI, LINK_NODE_NAME, 
URI+':'+LINK_NODE_NAME);
  -        
  +
           return result;
       }
  -    
  +
       /**
        * check if URL is a candidate for indexing
        *
  @@ -626,7 +626,7 @@
               }
               return false;
           }
  -        
  +
           final String s = url.toString();
           Iterator i = excludeCrawlingURL.iterator();
           while (i.hasNext()) {
  @@ -643,8 +643,8 @@
           }
           return false;
       }
  -    
  -    
  +
  +
       /**
        * check if URL is a candidate for indexing
        *
  @@ -660,7 +660,7 @@
               }
               return true;
           }
  -        
  +
           final String s = url.toString();
           Iterator i = includeCrawlingURL.iterator();
           while (i.hasNext()) {
  @@ -677,10 +677,10 @@
           }
           return false;
       }
  -    
  +
       public void recycle() {
           super.recycle();
  -        
  +
           this.attributes = null;
           //this.excludeCrawlingURL = null;
       }


----------------------------------------------------------------------
In case of troubles, e-mail:     [EMAIL PROTECTED]
To unsubscribe, e-mail:          [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/generation LinkStatusGenerator.java

Reply via email to