lucene LuceneContentIndexer.java

luetzkendorf Mon, 08 Nov 2004 01:45:52 -0800

luetzkendorf    2004/11/08 01:45:48

  Added:       src/stores/org/apache/slide/index/lucene/expressions
                        ContainsExpression.java
               src/stores/org/apache/slide/index/lucene
                        LuceneContentIndexer.java
  Log:
  content indexing added
  
  Revision  Changes    Path
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/ContainsExpression.java
  
  Index: ContainsExpression.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/ContainsExpression.java,v
 1.1 2004/11/08 09:45:47 luetzkendorf Exp $
   * $Revision: 1.1 $
   * $Date: 2004/11/08 09:45:47 $
   *
   * ====================================================================
   *
   * Copyright 1999-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   */
  package org.apache.slide.index.lucene.expressions;
  
  import java.io.IOException;
  import java.io.StringReader;
  import java.util.ArrayList;
  
  import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.search.BooleanQuery;
  import org.apache.lucene.search.PhraseQuery;
  import org.apache.lucene.search.PrefixQuery;
  import org.apache.lucene.search.TermQuery;
  import org.apache.lucene.search.WildcardQuery;
  
  import org.apache.slide.index.lucene.Index;
  import org.apache.slide.index.lucene.IndexConfiguration;
  import org.apache.slide.search.BadQueryException;
  import org.jdom.Element;
  
  /**
   * Implements the <code>contains</code> expression.
   */
  public class ContainsExpression extends AbstractExpression
  {
  
      public ContainsExpression(Index index, Element element, boolean negated)
              throws BadQueryException
      {
          super(index);
  
          IndexConfiguration config = index.getConfiguration();
          String literal = element.getText();
  
          int starPos = literal.indexOf('*');
          int qmPos = literal.indexOf('?');
          if (starPos != -1 || qmPos != -1) {
              if (starPos == literal.length()-1 && qmPos == -1) {
                  // some thing like "word*"
                  // TODO the .toLowerCase() should depend from the Analyzer
                  setQuery(new PrefixQuery(new Term(Index.CONTENT_FIELD_NAME, 
                          literal.substring(0, 
literal.length()-1).toLowerCase())));
              } else {
                  // TODO dito
                  setQuery(new WildcardQuery(new Term(Index.CONTENT_FIELD_NAME, 
literal.toLowerCase())));
              }
          } else {
              termOrPhraseQuery(config, Index.CONTENT_FIELD_NAME, literal);
          }
          
          if (negated) {
              setQuery(negateQuery(getQuery()));
          }
      }
  
      private void termOrPhraseQuery(IndexConfiguration config, String field, 
String text)
      {
          TokenStream ts = config.getAnalyzer().tokenStream(field,
                  new StringReader(text));
  
          ArrayList tokens = new ArrayList(20);
          try {
              for (Token t = ts.next(); t != null; t = ts.next()) {
                  tokens.add(t.termText());
              }
          } catch (IOException e) {
              // should not happen, because we are reading from StringReader
          }
          
          if (tokens.size() > 1) {
              PhraseQuery phraseQuery = new PhraseQuery();
              for(int i = 0, l = tokens.size(); i<l; i++) {
                  phraseQuery.add(new Term(field, (String)tokens.get(i)));
              }
              setQuery(phraseQuery);
          } else if (tokens.size() == 1) {
              setQuery(new TermQuery(new Term(field, (String)tokens.get(0))));
          } else {
              // TODO NOP query???
              setQuery(new BooleanQuery());
          }
      }
  }
  
  
  
  1.1                  
jakarta-slide/src/stores/org/apache/slide/index/lucene/LuceneContentIndexer.java
  
  Index: LuceneContentIndexer.java
  ===================================================================
  /*
  *
  * ====================================================================
  *
  * Copyright 2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  */
  package org.apache.slide.index.lucene;
  
  import java.io.ByteArrayInputStream;
  import java.util.Hashtable;
  
  import javax.transaction.xa.XAException;
  import javax.transaction.xa.Xid;
  
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.SimpleAnalyzer;
  
  import org.apache.slide.common.NamespaceAccessToken;
  import org.apache.slide.common.ServiceInitializationFailedException;
  import org.apache.slide.common.ServiceParameterErrorException;
  import org.apache.slide.common.ServiceParameterMissingException;
  import org.apache.slide.common.Uri;
  import org.apache.slide.content.NodeRevisionContent;
  import org.apache.slide.content.NodeRevisionDescriptor;
  import org.apache.slide.content.NodeRevisionNumber;
  import org.apache.slide.extractor.ExtractorManager;
  import org.apache.slide.search.IndexException;
  
  
  /**
   * IndexStore implementation for indexing content based on Jakarta Lucene.
   */
  public class LuceneContentIndexer extends AbstractLuceneIndexer
  {
      private static final String ANALYZER_PARAM = "analyzer";
      private String analyserClassName;
      
      public void initialize(NamespaceAccessToken token)
              throws ServiceInitializationFailedException
      {
          super.initialize(token);
          try {
              indexConfiguration.initDefaultConfiguration();
              
              indexConfiguration.setContentAnalyzer(
                      createAnalyzer(this.analyserClassName));
              
              this.index = new Index(indexConfiguration, getLogger(), 
                      "content " + this.scope);
          } 
          catch (IndexException e) {
              throw new ServiceInitializationFailedException(this, e);
          }
      }
      
      
      
      
      public void setParameters(Hashtable parameters)
          throws ServiceParameterErrorException,
                 ServiceParameterMissingException
      {
          super.setParameters(parameters);
          analyserClassName = (String)parameters.get(ANALYZER_PARAM);
      }
  
      /**
       * This implementation just calls the super implementation and catches
       * all exceptions to ensure that content indexing never makes a commit 
failing.
       */
      public void commit(Xid xid, boolean onePhase) throws XAException
      {
          try {
              super.commit(xid, onePhase);
          } catch (XAException e) {
              error("Error while committing to content index ({0})", e);
          }
      }
  
      /* 
       * @see 
org.apache.slide.search.Indexer#createIndex(org.apache.slide.common.Uri, 
org.apache.slide.content.NodeRevisionDescriptor, 
org.apache.slide.content.NodeRevisionContent)
       */
      public void createIndex(Uri uri, NodeRevisionDescriptor 
revisionDescriptor,
              NodeRevisionContent revisionContent) throws IndexException
      {
          if (isIncluded(uri.toString())) {
              if (ExtractorManager.getInstance().hasContentExtractor(
                      uri.getNamespace().getName(), uri.toString(), 
revisionDescriptor)) 
              {
                  TransactionalIndexResource indexResource = getCurrentTxn();
                  indexResource.addIndexJob(uri, revisionDescriptor, 
                          new 
ByteArrayInputStream(revisionContent.getContentBytes()));
              }
          }
      }
  
  
      /* 
       * @see 
org.apache.slide.search.Indexer#updateIndex(org.apache.slide.common.Uri, 
org.apache.slide.content.NodeRevisionDescriptor, 
org.apache.slide.content.NodeRevisionContent)
       */
      public void updateIndex(Uri uri, NodeRevisionDescriptor 
revisionDescriptor,
              NodeRevisionContent revisionContent) throws IndexException
      {
          if (isIncluded(uri.toString())) {
              if (ExtractorManager.getInstance().hasContentExtractor(
                      uri.getNamespace().getName(), uri.toString(), 
revisionDescriptor)) 
              {
                  TransactionalIndexResource indexResource = getCurrentTxn();
                  indexResource.addUpdateJob(uri, revisionDescriptor, 
                          new 
ByteArrayInputStream(revisionContent.getContentBytes()));
              }
          }
      }
      
      /* 
       * @see 
org.apache.slide.search.Indexer#dropIndex(org.apache.slide.common.Uri, 
org.apache.slide.content.NodeRevisionNumber)
       */
      public void dropIndex(Uri uri, NodeRevisionNumber number)
              throws IndexException
      {
          if (isIncluded(uri.toString())) {
              if (ExtractorManager.getInstance().hasContentExtractor(
                      uri.getNamespace().getName(), uri.toString(), null)) 
              {
                  TransactionalIndexResource indexResource = getCurrentTxn();
                  indexResource.addRemoveJob(uri, number);
              }
          }
  
      }
  
      protected Analyzer createAnalyzer(String clsName) 
          throws ServiceInitializationFailedException 
      {
          Analyzer analyzer;
          if (clsName == null || clsName.length() == 0) {
              analyzer = new SimpleAnalyzer();
  
          } else {
  
              try {
                  Class analyzerClazz = Class.forName(clsName);
                  analyzer = (Analyzer)analyzerClazz.newInstance();
  
              } catch (ClassNotFoundException e) {
                  error("Error while instantiating analyzer {1} {2}", 
                                  clsName, e.getMessage());
                  throw new ServiceInitializationFailedException(this, e);
  
              } catch (InstantiationException e) {
                  error("Error while instantiating analyzer {1} {2}", 
                          clsName, e.getMessage());
                  throw new ServiceInitializationFailedException(this, e);
  
              } catch (IllegalAccessException e) {
                  error("Error while instantiating analyzer {1} {2}", 
                          clsName, e.getMessage());
                  throw new ServiceInitializationFailedException(this, e);
              }
          }
          
          info("using analyzer: {0}", analyzer.getClass().getName());
          return analyzer;
      }
  }


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-slide/src/stores/org/apache/slide/index/lucene LuceneContentIndexer.java

Reply via email to