Dear All,

Here is a simple Lucene indexer. It is possible to make some optimizations. I will try to do that asap. It is based on the new event model made by Daniel. In the domain.xml, no many changes are required, you need to just defined the index directory. I also send a new version of the IndexTrigger which contains a small bug. RevisionNumber can sometime be null. So, I just add a test on the null value.

Christophe

Here is the config in the domain.xml


<listener classname="org.apache.slide.search.IndexTrigger">
<configuration>
<indexer classname="org.apache.slide.search.LuceneIndexer" synchronous="true" uri="/files/articles" >
<configuration>
<directory>index</directory>
</configuration>
</indexer>
</configuration>
</listener>
/*
 * Copyright 2000-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.slide.search;

// Java imports
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

// Lucene imports
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.slide.common.Domain;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeRevisionContent;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.util.logger.Logger;

/**
 * Lucene utility class. 
 * 
 * This utility class is mainly used by the Slide indexer in order to add, remove and 
update document into the Lucene index.
 * The search method can be used for a store specific implementation of the search 
service.
 *
 * @author <a href="mailto:[EMAIL PROTECTED]">Christophe Lombart</a>
 * 
 */
public class LuceneUtil
{

    protected static final String LOG_CHANNEL = LuceneUtil.class.getName();
    private static File rootDir = null;

    public static final String URI_FIELD = "uri";
    public static final String REVISION_FIELD = "revision";
    public static final String CONTENT_TEXT = "content";
    
    /**
     * Initialise Lucene 
     * 
     * @param indexRoot Directory used to create the Lucene index
     * @throws Exception - Impossible to create the lucene index
     */
    public static void init(String indexRoot) throws Exception
    {

        try
        {
            rootDir = new File(indexRoot);

            if (!rootDir.exists())
            {
                rootDir.mkdir();
            }

            // Check if the IndexSearcher can be created on this indexRoot
            Searcher searcher = null;
            searcher = new IndexSearcher(rootDir.getPath());
            searcher.close();
        }
        catch (Exception e)
        {
            // Impossible to create an IndexSearcher, so try to initialize the index 
Writer
            IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), 
true);
            indexWriter.close();
            indexWriter = null;
        }

        Domain.log("Lucene is correctly initialized", LOG_CHANNEL, Logger.INFO);

    }
    
    /**
     * 
     * Create a new document into the Lucene index
     * 
     * @param uri
     * @param revisionDescriptor
     * @param revisionContent
     * @return true if the document is correctly inserted
     * 
     */
    public static boolean createIndex(Uri uri, NodeRevisionDescriptor 
revisionDescriptor, NodeRevisionContent revisionContent)
    {
        boolean result = false;

        IndexWriter indexWriter = null;
        try
        {
            indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);

            // Create document
            Document doc = new Document();

            // Populate Lucene document from the Slide descriptor and content revision
            // TODO: add other Slide properties 

            doc.add(Field.Keyword(URI_FIELD, uri.toString()));
            doc.add(Field.Keyword(REVISION_FIELD, uri.toString() + "-" + 
revisionDescriptor.getRevisionNumber().toString()));

            if (revisionContent != null)
            {
                doc.add(Field.Text(CONTENT_TEXT, revisionContent.readContent()));
            }

            indexWriter.addDocument(doc);

            Domain.log(
                "Added '" + uri.toString() + " - " + 
revisionDescriptor.getRevisionNumber().toString() + "' to index",
                LOG_CHANNEL,
                Logger.INFO);
            indexWriter.optimize();
            result = true;
        }
        catch (IOException e)
        {
            Domain.log("Error while adding document into the Lucene index " + 
e.getMessage(), LOG_CHANNEL, Logger.ERROR);
        }
        finally
        {
            try
            {
                if (indexWriter != null)
                {
                    indexWriter.close();
                }
            }
            catch (IOException e)
            {
                Domain.log("Error while closing index writer.", LOG_CHANNEL, 
Logger.ERROR);
            }
        }

        return result;
    }

    
    /**
     * Drop a document from the Lucene index
     * 
     * @param uri
     * @param number
     * @return true if the document is correctly dropped
     * 
     */
    public static boolean dropIndex(Uri uri, NodeRevisionNumber number)
    {
        boolean result = false;

        try
        {
            IndexReader indexReader = IndexReader.open(rootDir);
            Term term = null;
            
            // Revision number can be null (eg. when the Slide document is deleted via 
the MacroHelper with specifying 
            // a revision number            
            if (number == null)
            {
                term = new Term(URI_FIELD, uri.toString());
            }
            else
            {
                term = new Term(REVISION_FIELD, uri.toString() + "-" + 
number.toString());
            }

            indexReader.delete(term);
            indexReader.close();

            IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), 
false);
            indexWriter.optimize();
            indexWriter.close();

        }
        catch (Exception e)
        {
            Domain.log("Impossible to delete " + uri.toString() + " - " + 
number.toString() + " from the Lucene index");
            result = false;
        }

        return result;
    }
   
    /**
     * Update a document into the Lucene indexer
     * 
     * @param uri
     * @param revisionDescriptor
     * @param revisionContent
     * 
     * @return true if the document is correctly updated
     *  
     */
    public static boolean updateIndex(Uri uri, NodeRevisionDescriptor 
revisionDescriptor, NodeRevisionContent revisionContent)
    {
        boolean result = false;

        try
        {
            // Delete entries from index
            result = dropIndex(uri, revisionDescriptor.getRevisionNumber());
            if (result)
            {
                createIndex(uri, revisionDescriptor, revisionContent);
                result = true;
            }
        }
        catch (Throwable e)
        {
            Domain.log(
                "Exception when updating the index with " + uri.toString() + " - " + 
revisionDescriptor.getRevisionNumber(),
                LOG_CHANNEL,
                Logger.ERROR);
        }

        return result;
    }

    /**
     * 
     * Search documents based on a simple full text search.
     * 
     * @param fulltext search query based on the Lucene query syntax (can contains 
field/property references if needed)
     *   
     * @return a set of uri (String) which match to the search result
     *
     */
    public static Set search(String fullText)
    {
        Searcher searcher = null;
        Hits hits = null;
        HashSet set = new HashSet();
        try
        {
            searcher = new IndexSearcher(rootDir.getPath());

            Analyzer analyzer = new StandardAnalyzer();
            QueryParser parser = new QueryParser(CONTENT_TEXT, analyzer);
            Query query = parser.parse(fullText);
            hits = searcher.search(query);
            int len = hits.length();
            for (int i = 0; i < len; i++)
            {
                Document document = hits.doc(i);
                String uri = document.get(LuceneUtil.URI_FIELD);
                set.add(uri);
            }
            return set;
        }
        catch (Exception e)
        {
            Domain.log("Failed to search with " + fullText, LOG_CHANNEL, Logger.ERROR);
            return null;
        }
        finally
        {
            if (searcher != null)
            {
                try
                {
                    searcher.close();
                }
                catch (IOException ioe)
                {
                    Domain.log("Impossible to close the seacher ", LOG_CHANNEL, 
Logger.ERROR);
                }
            }

        }
    }
}
/*
 * $Header: 
/home/cvspublic/jakarta-slide/src/share/org/apache/slide/search/IndexTrigger.java,v 
1.2 2004/02/25 15:43:51 dflorey Exp $
 * $Revision: 1.2 $
 * $Date: 2004/02/25 15:43:51 $
 *
 * ====================================================================
 *
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.slide.search;

import org.apache.slide.common.Domain;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.event.*;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
import org.apache.slide.util.logger.Logger;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;

/**
 * @author <a href="mailto:[EMAIL PROTECTED]">Daniel Florey</a>
 * 
 * @version $Revision: 1.2 $
 */

public class IndexTrigger implements EventCollectionListener, Configurable {
    protected static final String LOG_CHANNEL = IndexTrigger.class.getName();

    protected List indexers = new ArrayList();

    public void configure(Configuration configuration) throws ConfigurationException {
        Enumeration contentIndexers = configuration.getConfigurations("indexer");
        while (contentIndexers.hasMoreElements()) {
            Configuration listenerConfig = 
(Configuration)contentIndexers.nextElement();
            String classname = listenerConfig.getAttribute("classname");
            String uri = listenerConfig.getAttribute("uri", null);
            String contentType = listenerConfig.getAttribute("content-type", null);
            boolean synchronous = listenerConfig.getAttributeAsBoolean("synchronous", 
false);
             try {
                Class listenerClass = Class.forName(classname);
                Indexer indexer = null;
                try {
                    Method getInstanceMethod = listenerClass.getMethod("getInstance", 
new Class[0]);
                    indexer = (Indexer)getInstanceMethod.invoke(null, null);
                } catch ( NoSuchMethodException e) {
                    indexer = (Indexer)listenerClass.newInstance();
                }
                if ( indexer instanceof Configurable ) {
                    
((Configurable)indexer).configure(listenerConfig.getConfiguration("configuration"));
                }
                addIndexer(indexer, contentType, uri, synchronous);
            } catch (ClassCastException e) {
                throw new ConfigurationException("Indexer '"+classname+"' is not of 
type Indexer", configuration);
            } catch (Exception e) {
                throw new ConfigurationException("Indexer '"+classname+"' could not be 
loaded", configuration);
            }
        }
    }

    public void vetoableCollected(EventCollection collection) throws VetoException {
        try {
            triggerIndexers(collection, true);
        } catch ( IndexException e ) {
            throw new VetoException(e.getMessage());
        }
    }

    public void collected(EventCollection collection) {
        try {
            triggerIndexers(collection, false);
        } catch ( IndexException e ) {
            Domain.log("Index might be out of sync! Reason: "+e.getMessage(), 
LOG_CHANNEL, Logger.CRITICAL);
        }
    }

    private synchronized void triggerIndexers(EventCollection collection, boolean 
synchronous) throws IndexException {
        ContentEvent[] update = EventCollectionFilter.getChangedContents(collection);
        for ( int i = 0; i < update.length; i++ ) {
            Indexer[] indexers = getIndexers(update[i].getRevisionDescriptors(), 
update[i].getRevisionDescriptor(), synchronous);
            for ( int j = 0; j < indexers.length; j++ ) {
                indexers[j].updateIndex(new Uri(update[i].getNamespace(), 
update[i].getUri()), update[i].getRevisionDescriptor(), 
update[i].getRevisionContent());
            }
        }
        ContentEvent[] insert = EventCollectionFilter.getCreatedContents(collection);
        for ( int i = 0; i < insert.length; i++ ) {
            Indexer[] indexers = getIndexers(insert[i].getRevisionDescriptors(), 
insert[i].getRevisionDescriptor(), synchronous);
            for ( int j = 0; j < indexers.length; j++ ) {
                indexers[j].createIndex(new Uri(insert[i].getNamespace(), 
insert[i].getUri()), insert[i].getRevisionDescriptor(), 
insert[i].getRevisionContent());
            }
        }
        ContentEvent[] delete = EventCollectionFilter.getRemovedContents(collection);
        for ( int i = 0; i < delete.length; i++ ) {
            Indexer[] indexers = getIndexers(delete[i].getRevisionDescriptors(), 
delete[i].getRevisionDescriptor(), synchronous);
            for ( int j = 0; j < indexers.length; j++ ) {
                
                // Revision number can be null.
                //(eg. the document is deleted via the MacroHelp without specifying 
the  // revision number
                NodeRevisionNumber revisionNumber = null;
                if (delete[i].getRevisionDescriptor() != null) {
                    revisionNumber = 
delete[i].getRevisionDescriptor().getRevisionNumber();
                }                    
                indexers[j].dropIndex(new Uri(delete[i].getNamespace(), 
delete[i].getUri()), revisionNumber );
            }
        }
    }

    private void addIndexer(Indexer indexer, String contentType, String uri, boolean 
synchronous) {
        indexers.add(new IndexerMapping(indexer, contentType, uri, synchronous));
    }

    private Indexer []getIndexers(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor, boolean synchronous) {
        List matchingIndexers = new ArrayList();
        for ( Iterator i = indexers.iterator(); i.hasNext(); ) {
            IndexerMapping mapping = (IndexerMapping)i.next();
            if ( mapping.isSynchronous() == synchronous && 
mapping.matches(descriptors, descriptor)) {
                matchingIndexers.add(mapping.getIndexer());
            }
        }
        Indexer[] indexers = new Indexer[matchingIndexers.size()];
        return (Indexer [])matchingIndexers.toArray(indexers);
    }

    class IndexerMapping {
        Indexer indexer;
        String uri, contentType;
        boolean synchronous;

        public IndexerMapping(Indexer indexer, String contentType, String uri, boolean 
synchronous) {
            this.indexer = indexer;
            this.uri = uri;
            this.contentType = contentType;
            this.synchronous = synchronous;
        }

        public Indexer getIndexer() {
            return indexer;
        }

        public boolean isSynchronous() {
            return synchronous;
        }

        public boolean matches(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor) {
            boolean matching = true;
            if ( descriptor != null && contentType != null && 
!descriptor.getContentType().equals(contentType) ) {
                matching = false;
            }
            if ( descriptors != null && uri != null && 
!descriptors.getUri().startsWith(uri) ) {
                matching = false;
            }
            return matching;
        }
    }
}
/*
 * $Header: 
/home/cvspublic/jakarta-slide/src/share/org/apache/slide/search/LoggingIndexer.java,v 
1.1 2004/02/25 12:49:46 dflorey Exp $
 * $Revision: 1.1 $
 * $Date: 2004/02/25 12:49:46 $
 *
 * ====================================================================
 *
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.slide.search;

import org.apache.slide.common.Domain;
import org.apache.slide.common.Uri;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
import org.apache.slide.util.logger.Logger;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionContent;
import org.apache.slide.content.NodeRevisionNumber;


/**
 * @author <a href="mailto:[EMAIL PROTECTED]">Christophe Lombart</a>
 * @version $Revision: 1.1 $
 */

public class LuceneIndexer implements Indexer, Configurable {
    protected static final String LOG_CHANNEL = LuceneIndexer.class.getName();
    String directory = null;
    
    public void dropIndex(Uri uri, NodeRevisionNumber number) throws IndexException {  
      
       LuceneUtil.dropIndex(uri, number);        
    }

    public void createIndex(Uri uri, NodeRevisionDescriptor revisionDescriptor, 
NodeRevisionContent revisionContent) throws IndexException {
         LuceneUtil.createIndex(uri, revisionDescriptor, revisionContent);
    }

    public void updateIndex(Uri uri, NodeRevisionDescriptor revisionDescriptor, 
NodeRevisionContent revisionContent) throws IndexException {
        LuceneUtil.updateIndex(uri, revisionDescriptor, revisionContent);
    }
    
    /**
     * @see 
org.apache.slide.util.conf.Configurable#configure(org.apache.slide.util.conf.Configuration)
     */
    public void configure(Configuration configuration) throws ConfigurationException
    {
        try
        {
            // Get from the domain.xml file the index directory
            Configuration conf = configuration.getConfiguration("directory");
            directory = conf.getValue();
            Domain.log("Use index directory : " + directory, LOG_CHANNEL, Logger.INFO);
            
            // Initialise the Lucene Util 
            LuceneUtil.init(directory);
        }

        catch (Exception e)
        {            
            throw new ConfigurationException("Impossible to initialise the Lucene 
indexer", configuration);
        }
        
    }

}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to