Attached are three changes for the indexing capability provided by Cocoon and Lucene. Details below:
1) current code makes any URL's with paramaters incorrect as it always adds "?cocoon-view=" the the url, which invalidates any existing parameters. Patch provided detects for a ? in the url and only adds it if one does not exist. This applies to SimpleCocoonCrawlerImpl.java and in a similar way to SimpleLuceneXMLIndexerImpl.java 2) Change a system.out to a logger entry in SimpleCocoonCrawlerImpl.java 3) added a logger entry when lucene optimizes to SimpleLuceneCocoonIndexerImpl.java rgds CB Index: crawler/SimpleCocoonCrawlerImpl.java =================================================================== RCS file: /home/cvspublic/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/Si mpleCocoonCrawlerImpl.java,v retrieving revision 1.1 diff -u -r1.1 SimpleCocoonCrawlerImpl.java --- crawler/SimpleCocoonCrawlerImpl.java 3 Jan 2002 12:31:09 -0000 1.1 +++ crawler/SimpleCocoonCrawlerImpl.java 20 Jan 2002 00:49:41 -0000 @@ -58,7 +58,7 @@ * @since */ public final String LINK_CONTENT_TYPE_DEFAULT = "application/x-cocoon-links"; - + /** * Config element name specifying query-string appendend for requesting links * of an URL. @@ -77,7 +77,7 @@ * * @since */ - public final static String LINK_VIEW_QUERY_DEFAULT = "?cocoon-view=links"; + public final static String LINK_VIEW_QUERY_DEFAULT = "&cocoon-view=links"; /** * Config element name specifying excluding regular expression pattern. @@ -199,7 +199,7 @@ this.includeCrawlingURL.add(new RE(tokenized_pattern)); } } catch (RESyntaxException rese) { - getLogger().error("Cannot create includeing regular-expression for " + + getLogger().error("Cannot create includeing regular-expression for " + pattern, rese); } } @@ -217,7 +217,7 @@ this.excludeCrawlingURL.add(new RE(tokenized_pattern)); } } catch (RESyntaxException rese) { - getLogger().error("Cannot create excluding regular-expression for " + + getLogger().error("Cannot create excluding regular-expression for " + pattern, rese); } } @@ -416,6 +416,9 @@ // get links of url try { + if (url.toString().indexOf("?")==-1){ + linkViewQuery = "?" + linkViewQuery; + } URL links_url = new URL(url, url.getPath() + linkViewQuery); URLConnection links_url_connection = links_url.openConnection(); InputStream is = links_url_connection.getInputStream(); Index: search/SimpleLuceneCocoonIndexerImpl.java =================================================================== RCS file: /home/cvspublic/xml-cocoon2/src/java/org/apache/cocoon/components/search/Sim pleLuceneCocoonIndexerImpl.java,v retrieving revision 1.1 diff -u -r1.1 SimpleLuceneCocoonIndexerImpl.java --- search/SimpleLuceneCocoonIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1 +++ search/SimpleLuceneCocoonIndexerImpl.java 20 Jan 2002 00:49:42 -0000 @@ -198,8 +198,10 @@ // skip urls using different host, or port than host, // or port of base url - System.out.println("Skipping carwling URL " + crawl_url.toString() + + if (getLogger().isDebugEnabled()) { + getLogger().debug("Skipping crawling URL " + crawl_url.toString() + " as base_url is " + base_url.toString()); + } continue; } @@ -212,9 +214,15 @@ Document document = (Document) i.next(); writer.addDocument(document); } - } - // optimize it + + } + // optimize it writer.optimize(); + if (getLogger().isDebugEnabled()) { + getLogger().debug("Optimizing index" ); + } + + } catch (IOException ioe) { throw new ProcessingException("IOException in index()", ioe); } catch (ComponentException ce) { Index: search/SimpleLuceneXMLIndexerImpl.java =================================================================== RCS file: /home/cvspublic/xml-cocoon2/src/java/org/apache/cocoon/components/search/Sim pleLuceneXMLIndexerImpl.java,v retrieving revision 1.1 diff -u -r1.1 SimpleLuceneXMLIndexerImpl.java --- search/SimpleLuceneXMLIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1 +++ search/SimpleLuceneXMLIndexerImpl.java 20 Jan 2002 00:49:42 -0000 @@ -29,7 +29,6 @@ import org.apache.avalon.framework.configuration.ConfigurationException; import org.apache.avalon.framework.logger.AbstractLoggable; -import org.apache.avalon.framework.logger.AbstractLoggable; import org.apache.avalon.framework.parameters.Parameters; import org.apache.avalon.framework.thread.ThreadSafe; import org.apache.cocoon.ProcessingException; @@ -80,7 +79,7 @@ * * @since */ - final String CONTENT_QUERY = "?cocoon-view=content"; + final String CONTENT_QUERY = "&cocoon-view=content"; /** * set of allowed content types @@ -163,7 +162,14 @@ throws ProcessingException { try { - URL contentURL = new URL(url, url.getPath() + CONTENT_QUERY); + + String contentQuery = CONTENT_QUERY; + + if (url.toString().indexOf("?")==-1){ + contentQuery = "?" + contentQuery; + } + + URL contentURL = new URL(url, url.getPath() + contentQuery); URLConnection contentURLConnection = contentURL.openConnection(); String contentType = contentURLConnection.getContentType(); if (contentType != null && --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, email: [EMAIL PROTECTED]