Author: alexparvulescu Date: Fri Sep 27 08:55:58 2013 New Revision: 1526836
URL: http://svn.apache.org/r1526836 Log: OAK-1022 Add a custom Oak Lucene analizer Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1526836&r1=1526835&r2=1526836&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Fri Sep 27 08:55:58 2013 @@ -16,24 +16,17 @@ */ package org.apache.jackrabbit.oak.plugins.index.lucene; -import java.io.IOException; import java.io.Reader; -import org.apache.jackrabbit.oak.plugins.index.lucene.util.OakWordTokenFilter; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.standard.StandardFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; import org.apache.lucene.util.Version; public class OakAnalyzer extends Analyzer { - /** Default maximum allowed token length */ - public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; - - private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; - private final Version matchVersion; /** @@ -47,36 +40,16 @@ public class OakAnalyzer extends Analyze this.matchVersion = matchVersion; } - /** - * Set maximum allowed token length. If a token is seen that exceeds this - * length then it is discarded. This setting only takes effect the next time - * tokenStream or tokenStream is called. - */ - public void setMaxTokenLength(int length) { - maxTokenLength = length; - } - - /** - * @see #setMaxTokenLength - */ - public int getMaxTokenLength() { - return maxTokenLength; - } - @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { - final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); - src.setMaxTokenLength(maxTokenLength); - TokenStream tok = new StandardFilter(matchVersion, src); - tok = new LowerCaseFilter(matchVersion, tok); - tok = new OakWordTokenFilter(matchVersion, tok); - return new TokenStreamComponents(src, tok) { - @Override - protected void setReader(final Reader reader) throws IOException { - src.setMaxTokenLength(OakAnalyzer.this.maxTokenLength); - super.setReader(reader); - } - }; + WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader); + TokenStream tok = new LowerCaseFilter(matchVersion, src); + tok = new WordDelimiterFilter(tok, + WordDelimiterFilter.GENERATE_WORD_PARTS + | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE + | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null); + + return new TokenStreamComponents(src, tok); } }
