vgritsenko 02/01/23 11:06:39 Modified: src/java/org/apache/cocoon/components/crawler CocoonCrawler.java SimpleCocoonCrawlerImpl.java src/java/org/apache/cocoon/components/search LuceneCocoonHelper.java LuceneCocoonIndexer.java LuceneCocoonPager.java LuceneIndexContentHandler.java LuceneXMLIndexer.java SimpleLuceneCocoonIndexerImpl.java SimpleLuceneCocoonSearcherImpl.java SimpleLuceneXMLIndexerImpl.java Log: - Add getCountOfHits() to the pager to fill in the gap; - Organize imports - Use '&' instead of '?' when requesting an URI with parameters (patch idea by Colin Britton [[EMAIL PROTECTED]]) Revision Changes Path 1.2 +4 -3 xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java Index: CocoonCrawler.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- CocoonCrawler.java 3 Jan 2002 12:31:09 -0000 1.1 +++ CocoonCrawler.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -6,17 +6,18 @@ * the LICENSE file. * */ package org.apache.cocoon.components.crawler; -import java.net.*; -import java.util.*; import org.apache.avalon.framework.component.Component; +import java.net.URL; +import java.util.Iterator; + /** * The avalon behavioural component interface of crawling. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: CocoonCrawler.java,v 1.1 2002/01/03 12:31:09 giacomo Exp $ + * @version CVS $Id: CocoonCrawler.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public interface CocoonCrawler extends Component { 1.2 +19 -10 xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java Index: SimpleCocoonCrawlerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- SimpleCocoonCrawlerImpl.java 3 Jan 2002 12:31:09 -0000 1.1 +++ SimpleCocoonCrawlerImpl.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -7,13 +7,7 @@ */ package org.apache.cocoon.components.crawler; -import java.io.*; -import java.net.*; -import java.util.*; - -import org.apache.avalon.excalibur.pool.Recyclable; import org.apache.avalon.framework.activity.Disposable; - import org.apache.avalon.framework.configuration.Configurable; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; @@ -21,6 +15,8 @@ import org.apache.avalon.framework.parameters.Parameters; import org.apache.avalon.framework.thread.ThreadSafe; +import org.apache.avalon.excalibur.pool.Recyclable; + import org.apache.cocoon.Constants; import org.apache.cocoon.util.Tokenizer; @@ -29,11 +25,22 @@ import org.apache.regexp.RE; import org.apache.regexp.RESyntaxException; +import java.io.InputStream; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.IOException; +import java.net.URL; +import java.net.URLConnection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.ArrayList; + /** * A simple cocoon crawler. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09 giacomo Exp $ + * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public class SimpleCocoonCrawlerImpl extends AbstractLoggable implements CocoonCrawler, Configurable, Disposable, Recyclable @@ -77,7 +84,7 @@ * * @since */ - public final static String LINK_VIEW_QUERY_DEFAULT = "?cocoon-view=links"; + public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links"; /** * Config element name specifying excluding regular expression pattern. @@ -416,7 +423,9 @@ // get links of url try { - URL links_url = new URL(url, url.getPath() + linkViewQuery); + URL links_url = new URL(url, url.getPath() + + ((url.getPath().indexOf("?") == -1) ? "?" : "&") + + linkViewQuery); URLConnection links_url_connection = links_url.openConnection(); InputStream is = links_url_connection.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); @@ -546,7 +555,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a> - * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09 giacomo Exp $ + * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public static class CocoonCrawlerIterator implements Iterator { 1.2 +6 -9 xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java Index: LuceneCocoonHelper.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- LuceneCocoonHelper.java 3 Jan 2002 12:31:13 -0000 1.1 +++ LuceneCocoonHelper.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -9,20 +9,21 @@ import java.io.File; import java.io.IOException; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.*; -import org.apache.lucene.store.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; /** * This class encapsulates some helper methods. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneCocoonHelper.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneCocoonHelper.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public class LuceneCocoonHelper { - /** *Gets the directory attribute of the LuceneCocoonHelper class * @@ -37,7 +38,6 @@ return fsDirectory; } - /** *Gets the analyzer attribute of the LuceneCocoonHelper class * @@ -55,7 +55,6 @@ return analyzer; } - /** *Gets the indexReader attribute of the LuceneCocoonHelper class * @@ -69,7 +68,6 @@ return reader; } - /** *Gets the indexWriter attribute of the LuceneCocoonHelper class * @@ -84,6 +82,5 @@ IndexWriter writer = new IndexWriter(index, analyzer, create); return writer; } - } 1.2 +1 -3 xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java Index: LuceneCocoonIndexer.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- LuceneCocoonIndexer.java 3 Jan 2002 12:31:13 -0000 1.1 +++ LuceneCocoonIndexer.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -19,11 +19,10 @@ * The avalon behavioural component interface of an indexer. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneCocoonIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneCocoonIndexer.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public interface LuceneCocoonIndexer extends Component { - /** *Description of the Field * @@ -53,4 +52,3 @@ void index(Directory index, boolean create, URL base_url) throws ProcessingException; } - 1.2 +15 -8 xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java Index: LuceneCocoonPager.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- LuceneCocoonPager.java 3 Jan 2002 12:31:13 -0000 1.1 +++ LuceneCocoonPager.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -9,21 +9,19 @@ import java.io.File; import java.io.IOException; +import java.util.ListIterator; +import java.util.ArrayList; +import java.util.NoSuchElementException; -import java.util.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.index.*; import org.apache.lucene.search.Hits; -import org.apache.lucene.store.*; - -// implementtion of ListIterator /** * This class should help you to manage paging of hits. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public class LuceneCocoonPager implements ListIterator { @@ -127,6 +125,16 @@ /** + * Get count of hits + * + * @return The count of hits + * @since + */ + public int getCountOfHits() { + return hits.length(); + } + + /** * Get count of hits displayed per single page * * @return The countOfHitsPerPage value @@ -136,7 +144,6 @@ return this.countOfHitsPerPage; } - /** * Caluclate count of pages for displaying all hits * @@ -303,7 +310,7 @@ * A helper class encapsulating found document, and its score * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public static class HitWrapper { 1.2 +4 -6 xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java Index: LuceneIndexContentHandler.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- LuceneIndexContentHandler.java 3 Jan 2002 12:31:13 -0000 1.1 +++ LuceneIndexContentHandler.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -6,17 +6,17 @@ * the LICENSE file. * */ package org.apache.cocoon.components.search; + import java.util.ArrayList; import java.util.Iterator; import java.util.List; - import java.util.Stack; -import org.apache.lucene.document.DateField; +import org.apache.lucene.document.DateField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.xml.sax.Attributes; +import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; @@ -27,7 +27,7 @@ * Parse XML and generate lucene document(s) * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneIndexContentHandler.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneIndexContentHandler.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public class LuceneIndexContentHandler implements ContentHandler { @@ -222,6 +222,4 @@ * @since */ public void startPrefixMapping(String prefix, String uri) { } - } - 1.2 +3 -2 xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java Index: LuceneXMLIndexer.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- LuceneXMLIndexer.java 3 Jan 2002 12:31:13 -0000 1.1 +++ LuceneXMLIndexer.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -6,11 +6,13 @@ * the LICENSE file. * */ package org.apache.cocoon.components.search; + import java.net.URL; import java.util.Iterator; import java.util.List; import org.apache.avalon.framework.component.Component; + import org.apache.cocoon.ProcessingException; /** @@ -31,7 +33,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: LuceneXMLIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $ + * @version CVS $Id: LuceneXMLIndexer.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $ */ public interface LuceneXMLIndexer extends Component { @@ -129,4 +131,3 @@ */ void build(URL url) throws ProcessingException; } - 1.2 +4 -2 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java Index: SimpleLuceneCocoonIndexerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- SimpleLuceneCocoonIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1 +++ SimpleLuceneCocoonIndexerImpl.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -46,7 +46,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $ + * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $ */ public class SimpleLuceneCocoonIndexerImpl extends AbstractLoggable implements LuceneCocoonIndexer, Configurable, Composable, Disposable @@ -198,8 +198,10 @@ // skip urls using different host, or port than host, // or port of base url - System.out.println("Skipping carwling URL " + crawl_url.toString() + + if (getLogger().isDebugEnabled()) { + getLogger().debug("Skipping crawling URL " + crawl_url.toString() + " as base_url is " + base_url.toString()); + } continue; } 1.2 +3 -6 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java Index: SimpleLuceneCocoonSearcherImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- SimpleLuceneCocoonSearcherImpl.java 3 Jan 2002 12:31:13 -0000 1.1 +++ SimpleLuceneCocoonSearcherImpl.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -14,22 +14,19 @@ import org.apache.avalon.excalibur.pool.Recyclable; import org.apache.avalon.framework.activity.Disposable; - import org.apache.avalon.framework.component.ComponentException; import org.apache.avalon.framework.component.ComponentManager; import org.apache.avalon.framework.component.Composable; - import org.apache.avalon.framework.configuration.Configurable; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; - import org.apache.avalon.framework.logger.AbstractLoggable; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.util.ClassUtils; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateField; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.*; @@ -57,7 +54,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $ + * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $ */ public class SimpleLuceneCocoonSearcherImpl extends AbstractLoggable implements LuceneCocoonSearcher, Configurable, Composable, Disposable, Recyclable @@ -218,8 +215,8 @@ this.directory = directory; if (indexReaderCache != null) { indexReaderCache.close(); + indexReaderCache = null; } - indexReaderCache = null; } 1.2 +5 -3 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java Index: SimpleLuceneXMLIndexerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- SimpleLuceneXMLIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1 +++ SimpleLuceneXMLIndexerImpl.java 23 Jan 2002 19:06:38 -0000 1.2 @@ -54,7 +54,7 @@ * A simple class building lucene documents from xml content. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $ + * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $ */ public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable implements LuceneXMLIndexer, Configurable, Composable @@ -80,7 +80,7 @@ * * @since */ - final String CONTENT_QUERY = "?cocoon-view=content"; + final String CONTENT_QUERY = "cocoon-view=content"; /** * set of allowed content types @@ -163,7 +163,9 @@ throws ProcessingException { try { - URL contentURL = new URL(url, url.getPath() + CONTENT_QUERY); + URL contentURL = new URL(url, url.getPath() + + ((url.getPath().indexOf("?") == -1) ? "?" : "&") + + CONTENT_QUERY); URLConnection contentURLConnection = contentURL.openConnection(); String contentType = contentURLConnection.getContentType(); if (contentType != null &&
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]