knguyen     2005/12/22 15:21:45 CET

  Modified files:
    core/src/java/org/jahia/services/search/analyzer 
                                                     StandardAnalyzer.java 
  Added files:
    core/src/java/org/jahia/services/search/analyzer 
                                                     TokenWithDotFilter.java 
  Log:
  add Dot post filter to split words with lucene analyzer
  
  Revision  Changes    Path
  1.4       +2 -1      
jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java.diff?r1=1.3&r2=1.4&f=h
  1.1       +64 -0     
jahia/core/src/java/org/jahia/services/search/analyzer/TokenWithDotFilter.java 
(new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/analyzer/TokenWithDotFilter.java?rev=1.1&content-type=text/plain
  
  
  
  Index: TokenWithDotFilter.java
  ====================================================================
  package org.jahia.services.search.analyzer;
  
  import org.apache.lucene.analysis.*;
  import org.apache.lucene.analysis.Token;
  import org.jahia.services.search.JahiaSearchConstant;
  
  import java.io.IOException;
  import java.util.Stack;
  import java.util.StringTokenizer;
  
  /**
   * Created by IntelliJ IDEA.
   * User: hollis
   * Date: 25 mai 2005
   * Time: 21:05:11
   * To change this template use File | Settings | File Templates.
   */
  public class TokenWithDotFilter extends TokenFilter
          implements StandardTokenizerConstants {
  
      private static final String ACRONYM_TYPE = tokenImage[ACRONYM];
      private static final String EMAIL_TYPE = tokenImage[EMAIL];
      private static final String HOST_TYPE = tokenImage[HOST];
  
      private Stack splittedWords;
  
      public TokenWithDotFilter(TokenStream in) {
          super(in);
          splittedWords = new Stack();
      }
  
      public final Token next() throws IOException {
          if (splittedWords.size() > 0) {
              return (Token) splittedWords.pop();
          }
          Token t = input.next();
          if (t == null) {
              return null;
          }
          splitWords(t);
          return t;
      }
  
      private void splitWords(Token t) {
          if (t.type() == ACRONYM_TYPE
              || t.type() == EMAIL_TYPE || t.type() == HOST_TYPE) {
              if (t.termText().startsWith(JahiaSearchConstant.JAHIA_PREFIX) ){
                  return;
              }
              StringTokenizer st = new StringTokenizer(t.termText(), ".");
              Token token = null;
              String text = null;
              while (st.hasMoreTokens()) {
                  text = st.nextToken();
                  if (text.length() > 1) {
                      token = new Token(text, t.startOffset(), t.endOffset());
                      token.setPositionIncrement(0);
                      splittedWords.push(token);
                  }
              }
          }
      }
  
  }
  
  
  
  Index: StandardAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- StandardAnalyzer.java     19 Dec 2005 16:16:46 -0000      1.3
  +++ StandardAnalyzer.java     22 Dec 2005 14:21:45 -0000      1.4
  @@ -63,7 +63,7 @@
    * Filters [EMAIL PROTECTED] StandardTokenizer} with [EMAIL PROTECTED] 
StandardFilter}, [EMAIL PROTECTED]
    * LowerCaseFilter} and [EMAIL PROTECTED] StopFilter}.
    *
  - * @version $Id: StandardAnalyzer.java,v 1.3 2005/12/19 16:16:46 knguyen Exp 
$
  + * @version $Id: StandardAnalyzer.java,v 1.4 2005/12/22 14:21:45 knguyen Exp 
$
    */
   public class StandardAnalyzer extends Analyzer {
   
  @@ -116,6 +116,7 @@
           result = new LanguageIndependantFilter(result);
           if (this.indexeAnalyzer) {
               result = new TokenWithQuoteFilter(result);
  +            result = new TokenWithDotFilter(result);
           }
           return result;
       }
  

Reply via email to