jeremy 2002/11/23 06:12:46 Modified: src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java Log: added configuration parameters for 'content-view-query' and 'store-fields'. Revision Changes Path 1.14 +82 -4 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java Index: SimpleLuceneXMLIndexerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- SimpleLuceneXMLIndexerImpl.java 3 Aug 2002 02:33:35 -0000 1.13 +++ SimpleLuceneXMLIndexerImpl.java 23 Nov 2002 14:12:46 -0000 1.14 @@ -91,10 +91,32 @@ import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; +import org.apache.cocoon.util.Tokenizer; + + /** * A simple class building lucene documents from xml content. * + * <p> + * It has two parameters that effect the way it works: + * </p><p> + * <tt><store-fields/></tt> + * Sets which tags in your content are stored in Lucene as fields, + * during the indexing process. Allows them to be output with search hits. + * </p><p> + * <tt><content-view-query/></tt> + * Sets the view the indexer will request for indexing content. + * </p><p> + * Example configuration (goes in cocoon.xconf) + * <pre><tt> + * <lucene-xml-indexer logger="core.search.lucene"> + * <store-fields>title, summary<store-fields> + * <content-view-query>cocoon-view=search</content-view-query> + * </lucene-xml-indexer> + * </tt></pre></p> + * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> + * @author <a href="mailto:[EMAIL PROTECTED]">Jeremy Quinn</a> * @version CVS $Id$ */ public class SimpleLuceneXMLIndexerImpl extends AbstractLogEnabled @@ -108,12 +130,34 @@ protected ComponentManager manager = null; /** + * Config element name specifying query-string appendend for requesting links + * of an URL. + * <p> + * Its value is <code>link-view-query</code>. + * </p> + * + * @since + */ + public final static String CONTENT_VIEW_QUERY_CONFIG = "content-view-query"; + + /** * append this string to the url in order to get the * content view of the url * * @since */ - final String CONTENT_QUERY = "cocoon-view=content"; + + final String CONTENT_VIEW_QUERY_DEFAULT = "cocoon-view=content"; + + /** + * Config element name specifying the tags to be added as Stored, Untokenised, Unindexed Fields. + * <p> + * Its value is <code>field-tags</code>. + * </p> + * + * @since + */ + public final static String FIELDTAGS_CONFIG = "store-fields"; /** * set of allowed content types @@ -130,7 +174,12 @@ allowedContentType = new HashSet(); allowedContentType.add("text/xml"); allowedContentType.add("text/xhtml"); + fieldTags = new HashSet(); } + + + private String contentViewQuery = CONTENT_VIEW_QUERY_DEFAULT; + private HashSet fieldTags; /** @@ -140,7 +189,35 @@ * @exception ConfigurationException Description of Exception * @since */ - public void configure(Configuration conf) throws ConfigurationException { } + public void configure(Configuration configuration) throws ConfigurationException { + + Configuration[] children; + children = configuration.getChildren(FIELDTAGS_CONFIG); + if (children != null && children.length > 0) { + fieldTags = new HashSet(); + for (int i = 0; i < children.length; i++) { + String pattern = children[i].getValue(); + Tokenizer t = new Tokenizer(pattern, ", "); + while (t.hasMoreTokens()) { + String tokenized_pattern = t.nextToken(); + if (!tokenized_pattern.equals("")) { + this.fieldTags.add(tokenized_pattern); + if (getLogger().isDebugEnabled()) { + getLogger().debug("add field: " + tokenized_pattern); + } + } + } + } + } else { + if (getLogger().isDebugEnabled()) { + getLogger().debug("Do not add any fields"); + } + } + this.contentViewQuery = configuration.getChild(CONTENT_VIEW_QUERY_CONFIG, true).getValue(CONTENT_VIEW_QUERY_DEFAULT); + if (getLogger().isDebugEnabled()) { + getLogger().debug("content view: " + this.contentViewQuery); + } + } /** @@ -169,7 +246,7 @@ try { URL contentURL = new URL(url, url.getFile() + ((url.getFile().indexOf("?") == -1) ? "?" : "&") - + CONTENT_QUERY); + + contentViewQuery); URLConnection contentURLConnection = contentURL.openConnection(); if (contentURLConnection == null) { throw new ProcessingException("Can not open connection to URL " @@ -196,6 +273,7 @@ } LuceneIndexContentHandler luceneIndexContentHandler = new LuceneIndexContentHandler(); + luceneIndexContentHandler.setFieldTags(fieldTags); indexDocument(contentURLConnection, luceneIndexContentHandler); // // document is parsed
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]