wam         2004/02/25 02:23:52

  Added:       src/stores/org/apache/slide/index
                        BasicExpressionFactoryTxtContainsSample.java
                        BasicExpressionTxtContainsSample.java
                        LuceneIndexer.java SampleTxtContainsIndexer.java
  Log:
  an example for indexing with Lucene
  
  Revision  Changes    Path
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionFactoryTxtContainsSample.java
  
  Index: BasicExpressionFactoryTxtContainsSample.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionFactoryTxtContainsSample.java,v
 1.1 2004/02/25 10:23:52 wam Exp $
   * $Revision: 1.1 $
   * $Date: 2004/02/25 10:23:52 $
   *
   * ====================================================================
   *
   * Copyright 1999-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   */
  
  package org.apache.slide.index;
  
  import java.util.Collection;
  
  import org.apache.slide.content.NodeProperty.NamespaceCache;
  import org.apache.slide.search.BadQueryException;
  import org.apache.slide.search.PropertyProvider;
  import org.apache.slide.search.basic.IBasicExpression;
  import org.apache.slide.search.basic.IBasicExpressionFactory;
  import org.apache.slide.search.basic.IBasicQuery;
  import org.jdom.Element;
  
  /**
   * This factory creates executable BasicExpressions. An instance is created for
   * each SEARCH request.
   * */
  public class BasicExpressionFactoryTxtContainsSample implements 
IBasicExpressionFactory
  {
      
      
      private IBasicQuery query;
      protected PropertyProvider propertyProvider;
      
      private String rootPath;
      
      /**
       * Constructor
       *
       * @param    rootPath           path to the content files
       *
       */
      public BasicExpressionFactoryTxtContainsSample (String rootPath)
      {
          this.rootPath = rootPath;
      }
      
      /**
       * called for merge expressions (or, and). Not defined here
       *
       * @param    mergeOperator       and, or
       * @param    namespace           the namespace of this expression
       * @param    expressionsToMerge  all expressions, that shall be merged
       *
       * @return   an IBasicExpression
       *
       * @throws   BadQueryException
       *
       */
      public IBasicExpression createMergeExpression (String mergeOperator,
                                                     String namespace,
                                                     Collection expressionsToMerge)
          throws BadQueryException
      {
          return null;
      }
      
      /**
       * Called by the expression compiler for each leave expression.
       *
       * @param    element             an Element discribing the expression
       *
       * @return   an IBasicExpression
       *
       * @throws   BadQueryException
       *
       */
      public IBasicExpression createExpression (Element element)
          throws BadQueryException
      {
          BasicExpressionTxtContainsSample result = null;
          
          if (element == null)
          {
              throw new BadQueryException ("expected a where criteria");
          }
          else
          {
              String namespace = element.getNamespace().getURI();
              if (namespace.equals (NamespaceCache.DEFAULT_URI))
                  result = createDAVExpression (element);
              
              // allow store specific extensions
              //  else if (namespace.equals (MyNamespace))
              //      result = createMyExpression (element);
          }
          result.setFactory(this);
          return result;
      }
      
      
      /**
       * Called, when the expression is in the default (DAV:) namespace.
       *
       *
       * @param    e                   an Element
       *
       * @return   a BasicExpressionTemplate
       *
       */
      private BasicExpressionTxtContainsSample createDAVExpression (Element e)
      {
          String name = e.getName();
          BasicExpressionTxtContainsSample result = null;
          
          if (name.equals ("contains"))
          {
              String searchedText = e.getTextTrim();
              result = new BasicExpressionTxtContainsSample (searchedText, rootPath);
          }
          
          return result;
      }
      
      /**
       * called by BasicExpressionCompiler after construction.
       *
       * @param    query               the associated BasicQuery
       * @param    propertyProvider    the PropertyProvider for this expression.
       *
       * @throws   BadQueryException
       *
       */
      public void init(IBasicQuery query, PropertyProvider propertyProvider)
          throws BadQueryException
      {
          this.query = (IBasicQuery) query;
          this.propertyProvider = propertyProvider;
      }
      
      /**
       * Method getPropertyProvider
       *
       * @return   the PropertyProvider
       *
       */
      public PropertyProvider getPropertyProvider()
      {
          return propertyProvider;
      }
      
      /**
       * Method getQuery
       *
       * @return   the IBasicQuery
       *
       */
      public IBasicQuery getQuery()
      {
          return query;
      }
      
      
      private String propName (Element e)
      {
          Element propElem = e.getChild ("prop", e.getNamespace());
          Element el = (Element) propElem.getChildren().get(0);
          return el.getName();
      }
  }
  
  
  
  
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionTxtContainsSample.java
  
  Index: BasicExpressionTxtContainsSample.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionTxtContainsSample.java,v
 1.1 2004/02/25 10:23:52 wam Exp $
   * $Revision: 1.1 $
   * $Date: 2004/02/25 10:23:52 $
   *
   * ====================================================================
   *
   * Copyright 1999-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   */
  
  
  
  package org.apache.slide.index;
  
  import org.apache.slide.search.basic.*;
  
  import java.io.IOException;
  import java.util.Collection;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Searcher;
  import org.apache.slide.common.SlideException;
  import org.apache.slide.search.BadQueryException;
  import org.apache.slide.search.RequestedResource;
  import org.apache.slide.search.SearchException;
  import org.apache.slide.structure.ObjectNode;
  import org.apache.slide.structure.SubjectNode;
  
  /**
   * A very basic sample for a store specific Expression. Depending on the
   * complexity of the concrete store specific implementation, iut might be
   * a good idea to have an Expression class for each DAV: expression
   * (SQLEqExpression, SQLOrExpression, ...)
   *
   * @author <a href="mailto:[EMAIL PROTECTED]">Martin Wallmer</a>
   * @version $Revision: 1.1 $
   */
  public class BasicExpressionTxtContainsSample implements IBasicExpression
  {
      /** an example for an executable command */
      String searchedText;
      
      String indexPath;
      
      /** backptr to the factory */
      IBasicExpressionFactory factory;
      
      /**
       * constructor for a compare expression like gt, eq, ...
       * For your concrete implementation you are free, which parameters have to
       * be passed, let the factory give you everything you need.
       */
      BasicExpressionTxtContainsSample (String searchedText, String rootPath)
      {
          this.searchedText = searchedText;
          this.indexPath = rootPath;
      }
      
      /**
       * constructor for a merge expression
       */
      BasicExpressionTxtContainsSample (String mergeOperator,
                                        Collection children,
                                        IBasicExpressionFactory factory)
          throws BadQueryException
      {
          //        this.factory = factory;
          //        Iterator it = children.iterator();
          //        BasicExpressionTxtContainsSample firstChild = 
(BasicExpressionTxtContainsSample)it.next();
          //
          //        if (firstChild == null)
          //            throw new BadQueryException (mergeOperator + " needs at least 
one nested element");
          //
          //        theExecutableCommand = firstChild.theExecutableCommand;
          //
          //        // create the executable command
          //        while (it.hasNext()) {
          //            BasicExpressionTxtContainsSample exp = 
(BasicExpressionTxtContainsSample)it.next();
          //            theExecutableCommand += " " + mergeOperator + " " + 
exp.theExecutableCommand;
          //        }
      }
      
      /**
       * fake executer. The executable command is printed and a fake result is created.
       *
       * @return   an IBasicResultSet
       *
       * @throws   SearchException
       *
       */
      public IBasicResultSet execute() throws SearchException
      {
          IBasicResultSet result = new BasicResultSetImpl (false);
          
          try
          {
              Searcher searcher = new IndexSearcher(indexPath);
              Analyzer analyzer = new StandardAnalyzer();
              
              Query query = QueryParser.parse(searchedText, "contents", analyzer);
              Hits hits = searcher.search (query);
              int noOfHits = hits.length();
              
              for (int i = 0; i < noOfHits; i++)
              {
                  Document doc = hits.doc(i);
                  String uri = doc.get("documentId");
                  System.out.println(uri);
                  RequestedResource resource = createResource(uri);
                  result.add (resource);
              }
          }
          catch (Exception e)
          {
              throw new SearchException (e);
          }
          
          return  result;
      }
      
      private RequestedResource createResource(String uri) throws SearchException
      {
          ObjectNode node = new SubjectNode(uri); // this will return the root folder
          RequestedResource resource = null;
          IBasicQuery query = factory.getQuery();
          
          try
          {
              resource = new ComparableResourceImpl
                  (node, query.getSearchToken(), query.getScope(),
                   factory.getPropertyProvider());
          }
          catch (SlideException e)
          {
              throw new SearchException (e);
          }
          return resource;
      }
      
      public void setFactory (IBasicExpressionFactory factory)
      {
          this.factory = factory;
      }
      
      public IBasicExpressionFactory getFactory()
      {
          return this.factory;
      }
  }
  
  
  
  
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java
  
  Index: LuceneIndexer.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java,v 1.1 
2004/02/25 10:23:52 wam Exp $
   * $Revision: 1.1 $
   * $Date: 2004/02/25 10:23:52 $
   *
   * ====================================================================
   *
   * Copyright 1999-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   */
  
  
  package org.apache.slide.index;
  
  import java.io.File;
  import java.io.FileReader;
  import java.io.IOException;
  import java.io.Reader;
  import java.util.Date;
  import java.util.HashSet;
  import java.util.Iterator;
  import java.util.Set;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Searcher;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.FSDirectory;
  
  /**
   * Not intended for production.
   * Bug: running in test mode everything works fine,
   * in slide context no index is deleted when deleting resources.
   *
   */
  public class LuceneIndexer
  {
      private static final String DOC_ID = "documentId";
      
      
      private String indexDb;
      
      
      public LuceneIndexer (String indexDb)
      {
          this.indexDb = indexDb;
      }
      
      
      /**
       * removes an index for a docId
       * TODO: works in testmode (running Main), deletes nothing in slide context
       *
       * @param    docId               a  String
       *
       * @throws   IOException
       *
       */
      public void removeIndex (String docId) throws IOException
      {
          Directory directory = FSDirectory.getDirectory (indexDb, false);
          IndexReader reader = IndexReader.open(directory);
          
          Term deleteTerm = new Term(DOC_ID, docId);
          int deleted = reader.delete(deleteTerm);
          reader.close();
          directory.close();
      }
      
      /**
       * Method createIndex
       *
       * @param    docId               a  String
       *
       * @throws   IOException
       * @throws   Exception
       *
       */
      public void index (String docId, Reader reader)
          throws Exception
      {
          IndexWriter writer =
              new IndexWriter(indexDb, new StandardAnalyzer(), false);
          
  //      reader = new FileReader (docId);
          
          Document doc = new Document();
          doc.add (Field.Text ("contents", reader));
          Field field = new Field(DOC_ID, docId, true, true, true);
          doc.add(field);
          writer.addDocument(doc);
          writer.optimize();
          writer.close();
      }
      
      private static Reader getReader (String file) throws IOException
      {
          FileReader reader = new FileReader (file);
          return reader;
      }
      
      private Set contains(String stringToFind) throws Exception
      {
          Set resultSet = new HashSet();
          Searcher searcher = new IndexSearcher (indexDb);
          Analyzer analyzer = new StandardAnalyzer();
          
          Query query = QueryParser.parse(stringToFind, "contents", analyzer);
          
          Hits hits = searcher.search(query);
          
          int noOfHits = hits.length();
          for (int i = 0; i < noOfHits; i++)
          {
              Document doc = hits.doc(i);
              String docId = doc.get(DOC_ID);
              resultSet.add(docId);
          }
          searcher.close();
          return resultSet;
      }
      
      
      /**
       * Test. To run, adopt INDEX_DB and put two text files in current directory,
       * otto.txt and fritz.txt containing the strings
       * "hallo otto" and "hallo fritz"
       *
       * @param    args                a  String[]
       *
       * @throws   Exception
       *
       */
      public static void main(String[] args) throws Exception
      {
          String INDEX_DB ="D:\\projects\\tmp\\index";
          
          LuceneIndexer indexer = new LuceneIndexer (INDEX_DB);
          
          IndexWriter writer =
              new IndexWriter(INDEX_DB, new StandardAnalyzer(), true);
          
          writer.close();
              
          String ottoFile = "otto.txt";
          new LuceneIndexer (INDEX_DB).index (ottoFile, getReader(ottoFile));
          
          System.out.println("expect one element otto.txt");
          displayResult(indexer.contains("otto"));
          
          new LuceneIndexer (INDEX_DB).index("fritz.txt", getReader("fritz.txt"));
          System.out.println("expect fritz.txt");
          displayResult(indexer.contains("fritz"));
          
          System.out.println("expect fritz.txt and otto.txt");
          displayResult(indexer.contains("Hallo"));
          
          new LuceneIndexer (INDEX_DB).removeIndex ("otto.txt");
          
          System.out.println("expect fritz.txt");
          displayResult (indexer.contains("Hallo"));
          
          System.out.println("expect null");
          displayResult (indexer.contains("otto"));
          
          new LuceneIndexer (INDEX_DB).removeIndex ("fritz.txt");
          
          System.out.println("expect null");
          displayResult(indexer.contains("fritz"));
      }
      
      private static void displayResult(Set result)
      {
          for (Iterator iter = result.iterator(); iter.hasNext();)
          {
              String element = (String) iter.next();
              System.out.println(element);
          }
          
      }
  }
  
  
  
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/SampleTxtContainsIndexer.java
  
  Index: SampleTxtContainsIndexer.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/SampleTxtContainsIndexer.java,v
 1.1 2004/02/25 10:23:52 wam Exp $
   * $Revision: 1.1 $
   * $Date: 2004/02/25 10:23:52 $
   *
   * ====================================================================
   *
   * Copyright 1999-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   */
  package org.apache.slide.index;
  import java.io.CharArrayReader;
  import java.io.IOException;
  import java.util.Hashtable;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.slide.common.AbstractService;
  import org.apache.slide.common.NamespaceAccessToken;
  import org.apache.slide.common.ServiceAccessException;
  import org.apache.slide.common.ServiceConnectionFailedException;
  import org.apache.slide.common.ServiceDisconnectionFailedException;
  import org.apache.slide.common.ServiceInitializationFailedException;
  import org.apache.slide.common.ServiceParameterErrorException;
  import org.apache.slide.common.ServiceParameterMissingException;
  import org.apache.slide.common.ServiceResetFailedException;
  import org.apache.slide.common.Uri;
  import org.apache.slide.content.NodeRevisionContent;
  import org.apache.slide.content.NodeRevisionDescriptor;
  import org.apache.slide.content.NodeRevisionNumber;
  import org.apache.slide.search.IndexException;
  import org.apache.slide.search.basic.IBasicExpressionFactory;
  import org.apache.slide.store.IndexStore;
  
  
  /**
   * Take this as a starting point for your own Indexer implementation.
   *
   * @version $Revision: 1.1 $
   */
  public class SampleTxtContainsIndexer extends AbstractService implements IndexStore
  {
      private static final String INDEX_PATH = "indexpath";
      private static final String DOC_ID = "documentId";
      
      private String indexpath;
      
      /**
       * Create Index, if not yet done.
       *
       * @param    token               a  NamespaceAccessToken
       *
       * @throws   ServiceInitializationFailedException
       *
       */
      public void initialize(NamespaceAccessToken token)
          throws ServiceInitializationFailedException
      {
          String indexPath = token.getNamespaceConfig().getParameter (INDEX_PATH);
          IndexWriter indexWriter = null;
          try
          {
              indexWriter = new IndexWriter(indexpath, new StandardAnalyzer(), false);
          }
          // will fail, if not yet exists
          catch (IOException e)
          {
              try
              {
                  // create index
                  indexWriter = new IndexWriter(indexpath, new StandardAnalyzer(), 
true);
              }
              catch (IOException ex)
              {
                  throw new ServiceInitializationFailedException (this, ex);
              }
          }
          
          try
          {
              indexWriter.close();
          }
          catch (IOException e)
          {
              throw new ServiceInitializationFailedException (this, e);
          }
      }
      
      /**
       * Method getFactory
       *
       * @return   an IBasicExpressionFactory
       *
       */
      public IBasicExpressionFactory getBasicExpressionFactory()
      {
          return new BasicExpressionFactoryTxtContainsSample (indexpath);
      }
      
      private boolean started = false;
      
      
      /**
       * Index an object content.
       *
       * @param uri Uri
       * @exception IndexException Error accessing the Data Source
       */
      synchronized public void createIndex (Uri uri,
                                            NodeRevisionDescriptor revisionDescriptor,
                                            NodeRevisionContent revisionContent)
          throws IndexException
      {
          try
          {
              LuceneIndexer indexer = new LuceneIndexer (indexpath);
              indexer.index (uri.toString(),
                             new CharArrayReader (revisionContent.getContent()));
          }
          catch (Exception e)
          {
              throw new IndexException (e);
          }
          //index(revisionContent, uri);
      }
      
      /**
       * Method updateIndex
       *
       * @param    uri                 an Uri
       * @param    revisionDescriptor  a  NodeRevisionDescriptor
       * @param    revisionContent     a  NodeRevisionContent
       *
       * @throws   IndexException
       *
       */
      synchronized public void updateIndex(Uri uri,
                                           NodeRevisionDescriptor revisionDescriptor,
                                           NodeRevisionContent revisionContent)
          throws IndexException
      {
          try
          {
              LuceneIndexer indexer = new LuceneIndexer (indexpath);
              indexer.removeIndex (uri.toString());
              indexer.index (uri.toString(),
                             new CharArrayReader (revisionContent.getContent()));
          }
          catch (Exception e)
          {
              throw new IndexException (e);
          }
      }
          
      /**
       * Drop an object revision from the index.
       *
       * @param uri Uri
       * @exception ServiceAccessException Error accessing the Data Source
       */
      synchronized public void dropIndex(Uri uri, NodeRevisionNumber number)
          throws IndexException
      {
          try
          {
              LuceneIndexer indexer = new LuceneIndexer (indexpath);
              indexer.removeIndex (uri.toString());
          }
          catch (Exception e)
          {
              throw new IndexException (e);
          }
      }
          
      /**
       * Connects to the underlying data source (if any is needed).
       *
       * @exception ServiceConnectionFailedException Connection failed
       */
      public void connect() throws ServiceConnectionFailedException
      {
          System.out.println("SampleIndexer: connect");
          started = true;
      }
      
      /**
       * This function tells whether or not the service is connected.
       *
       * @return boolean true if we are connected
       * @exception ServiceAccessException Service access error
       */
      public boolean isConnected() throws ServiceAccessException
      {
          // System.out.println("isConnected");
          return started;
      }
      
      /**
       * Initializes the service with a set of parameters. Those could be :
       * <li>User name, login info
       * <li>Host name on which to connect
       * <li>Remote port
       * <li>JDBC driver whoich is to be used :-)
       * <li>Anything else ...
       *
       * @param parameters Hashtable containing the parameters' names
       * and associated values
       * @exception ServiceParameterErrorException Incorrect service parameter
       * @exception ServiceParameterMissingException Service parameter missing
       */
      public void setParameters (Hashtable parameters) throws 
ServiceParameterErrorException, ServiceParameterMissingException
      {
          indexpath = (String)parameters.get (INDEX_PATH);
          if (indexpath == null || indexpath.length() == 0)
              throw new ServiceParameterMissingException (this, INDEX_PATH);
      }
      
      /**
       * Disconnects from the underlying data source.
       *
       * @exception ServiceDisconnectionFailedException Disconnection failed
       */
      public void disconnect() throws ServiceDisconnectionFailedException
      {
          System.out.println("SampleIndexer: disconnect");
          started = false;
      }
      
      /**
       * Deletes service underlying data source, if possible (and meaningful).
       *
       * @exception ServiceResetFailedException Reset failed
       */
      public void reset() throws ServiceResetFailedException
      {
          System.out.println("SampleIndexer: reset");
      }
  }
  
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to