vgritsenko 02/02/28 06:11:42 Modified: src/java/org/apache/cocoon/components/crawler SimpleCocoonCrawlerImpl.java src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java Log: Don't use jdk1.3 methods, use jdk1.2 Revision Changes Path 1.9 +15 -19 xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java Index: SimpleCocoonCrawlerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- SimpleCocoonCrawlerImpl.java 27 Feb 2002 05:28:38 -0000 1.8 +++ SimpleCocoonCrawlerImpl.java 28 Feb 2002 14:11:42 -0000 1.9 @@ -82,7 +82,7 @@ * A simple cocoon crawler. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ + * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.9 2002/02/28 14:11:42 vgritsenko Exp $ */ public class SimpleCocoonCrawlerImpl extends AbstractLoggable implements CocoonCrawler, Configurable, Disposable, Recyclable @@ -118,6 +118,7 @@ * @since */ public final static String LINK_VIEW_QUERY_CONFIG = "link-view-query"; + /** * Default value of <code>link-view-query</code> configuration value. * <p> @@ -157,6 +158,7 @@ * @since */ public final static String USER_AGENT_CONFIG = "user-agent"; + /** * Default value of <code>user-agent</code> configuration value. * <p> @@ -176,6 +178,7 @@ * @since */ public final static String ACCEPT_CONFIG = "accept"; + /** * Default value of <code>accept</code> configuration value. * <p> @@ -445,37 +448,30 @@ */ private List getLinks(URL url) { ArrayList url_links = null; + String sURL = url.toString(); - if (getLogger().isDebugEnabled()) { - getLogger().debug("getLinks URL " + url); - } - - if (!isIncludedURL(url.toString())) { - return null; - } - // don't try to get links for url which is excluded - if (isExcludedURL(url.toString())) { + if (!isIncludedURL(sURL) || isExcludedURL(sURL)) { return null; } // don't try to get links for url which has been crawled already - if (crawled.contains(url.toString())) { + if (crawled.contains(sURL)) { return null; } // mark it as crawled - crawled.add(url.toString()); + crawled.add(sURL); + // get links of url if (getLogger().isDebugEnabled()) { - getLogger().debug("Get links of URL: " + url.toString()); + getLogger().debug("Getting links of URL " + sURL); } - - // get links of url try { - URL links_url = new URL(url, url.getPath() - + ((url.getPath().indexOf("?") == -1) ? "?" : "&") + sURL = url.getFile(); + URL links = new URL(url, sURL + + ((sURL.indexOf("?") == -1) ? "?" : "&") + linkViewQuery); - URLConnection links_url_connection = links_url.openConnection(); + URLConnection links_url_connection = links.openConnection(); InputStream is = links_url_connection.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); @@ -602,7 +598,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a> - * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ + * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.9 2002/02/28 14:11:42 vgritsenko Exp $ */ public static class CocoonCrawlerIterator implements Iterator { 1.9 +3 -3 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java Index: SimpleLuceneXMLIndexerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- SimpleLuceneXMLIndexerImpl.java 27 Feb 2002 05:28:38 -0000 1.8 +++ SimpleLuceneXMLIndexerImpl.java 28 Feb 2002 14:11:42 -0000 1.9 @@ -97,7 +97,7 @@ * A simple class building lucene documents from xml content. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ + * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.9 2002/02/28 14:11:42 vgritsenko Exp $ */ public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable implements LuceneXMLIndexer, Configurable, Composable, ThreadSafe @@ -170,8 +170,8 @@ throws ProcessingException { try { - URL contentURL = new URL(url, url.getPath() - + ((url.getPath().indexOf("?") == -1) ? "?" : "&") + URL contentURL = new URL(url, url.getFile() + + ((url.getFile().indexOf("?") == -1) ? "?" : "&") + CONTENT_QUERY); URLConnection contentURLConnection = contentURL.openConnection(); String contentType = contentURLConnection.getContentType();
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]