knguyen 2005/12/22 15:21:45 CET
Modified files:
core/src/java/org/jahia/services/search/analyzer
StandardAnalyzer.java
Added files:
core/src/java/org/jahia/services/search/analyzer
TokenWithDotFilter.java
Log:
add Dot post filter to split words with lucene analyzer
Revision Changes Path
1.4 +2 -1
jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java.diff?r1=1.3&r2=1.4&f=h
1.1 +64 -0
jahia/core/src/java/org/jahia/services/search/analyzer/TokenWithDotFilter.java
(new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/analyzer/TokenWithDotFilter.java?rev=1.1&content-type=text/plain
Index: TokenWithDotFilter.java
====================================================================
package org.jahia.services.search.analyzer;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Token;
import org.jahia.services.search.JahiaSearchConstant;
import java.io.IOException;
import java.util.Stack;
import java.util.StringTokenizer;
/**
* Created by IntelliJ IDEA.
* User: hollis
* Date: 25 mai 2005
* Time: 21:05:11
* To change this template use File | Settings | File Templates.
*/
public class TokenWithDotFilter extends TokenFilter
implements StandardTokenizerConstants {
private static final String ACRONYM_TYPE = tokenImage[ACRONYM];
private static final String EMAIL_TYPE = tokenImage[EMAIL];
private static final String HOST_TYPE = tokenImage[HOST];
private Stack splittedWords;
public TokenWithDotFilter(TokenStream in) {
super(in);
splittedWords = new Stack();
}
public final Token next() throws IOException {
if (splittedWords.size() > 0) {
return (Token) splittedWords.pop();
}
Token t = input.next();
if (t == null) {
return null;
}
splitWords(t);
return t;
}
private void splitWords(Token t) {
if (t.type() == ACRONYM_TYPE
|| t.type() == EMAIL_TYPE || t.type() == HOST_TYPE) {
if (t.termText().startsWith(JahiaSearchConstant.JAHIA_PREFIX) ){
return;
}
StringTokenizer st = new StringTokenizer(t.termText(), ".");
Token token = null;
String text = null;
while (st.hasMoreTokens()) {
text = st.nextToken();
if (text.length() > 1) {
token = new Token(text, t.startOffset(), t.endOffset());
token.setPositionIncrement(0);
splittedWords.push(token);
}
}
}
}
}
Index: StandardAnalyzer.java
===================================================================
RCS file:
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/analyzer/StandardAnalyzer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- StandardAnalyzer.java 19 Dec 2005 16:16:46 -0000 1.3
+++ StandardAnalyzer.java 22 Dec 2005 14:21:45 -0000 1.4
@@ -63,7 +63,7 @@
* Filters [EMAIL PROTECTED] StandardTokenizer} with [EMAIL PROTECTED]
StandardFilter}, [EMAIL PROTECTED]
* LowerCaseFilter} and [EMAIL PROTECTED] StopFilter}.
*
- * @version $Id: StandardAnalyzer.java,v 1.3 2005/12/19 16:16:46 knguyen Exp
$
+ * @version $Id: StandardAnalyzer.java,v 1.4 2005/12/22 14:21:45 knguyen Exp
$
*/
public class StandardAnalyzer extends Analyzer {
@@ -116,6 +116,7 @@
result = new LanguageIndependantFilter(result);
if (this.indexeAnalyzer) {
result = new TokenWithQuoteFilter(result);
+ result = new TokenWithDotFilter(result);
}
return result;
}