fix lucene querying
Project: http://git-wip-us.apache.org/repos/asf/oodt/repo Commit: http://git-wip-us.apache.org/repos/asf/oodt/commit/ceadfcb2 Tree: http://git-wip-us.apache.org/repos/asf/oodt/tree/ceadfcb2 Diff: http://git-wip-us.apache.org/repos/asf/oodt/diff/ceadfcb2 Branch: refs/heads/master Commit: ceadfcb2b015956d915d6e9c58bd1b8ed7da4655 Parents: ab366e2 Author: Tom Barber <t...@analytical-labs.com> Authored: Wed Jul 26 13:16:31 2017 +0100 Committer: Tom Barber <t...@analytical-labs.com> Committed: Wed Jul 26 13:16:31 2017 +0100 ---------------------------------------------------------------------- .../oodt/cas/filemgr/tools/CASAnalyzer.java | 29 +++++++++++++++----- .../oodt/cas/filemgr/tools/QueryTool.java | 1 + 2 files changed, 23 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oodt/blob/ceadfcb2/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java ---------------------------------------------------------------------- diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java index bfe2384..53efeb9 100644 --- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java +++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java @@ -19,17 +19,23 @@ package org.apache.oodt.cas.filemgr.tools; //Lucene imports +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; //JDK imports import java.io.Reader; import java.util.Set; +import org.apache.lucene.util.AttributeFactory; /** @@ -44,6 +50,7 @@ import java.util.Set; */ public class CASAnalyzer extends Analyzer { private Set stopSet; + AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; /** * An array containing some common English words that are usually not useful @@ -60,17 +67,19 @@ public class CASAnalyzer extends Analyzer { @Override protected TokenStreamComponents createComponents(String fieldName) { TokenStream result = new WhitespaceTokenizer(/*reader*/); - result = new StandardFilter(result); + /*result = new StandardFilter(result); result = new StopFilter(result, STOP_WORDS); - - //TODO FIX try { - throw new Exception("needs fixing"); - } catch (Exception e) { + result.reset(); + } catch (IOException e) { e.printStackTrace(); } - return null; //new TokenStreamComponents(); + StandardTokenizer tokenizer = new StandardTokenizer(factory); + + return new TokenStreamComponents(tokenizer, result);*/ + return new TokenStreamComponents(new WhitespaceTokenizer()); + } public void tokenStreams(String fname, Reader reader){ @@ -78,7 +87,13 @@ public class CASAnalyzer extends Analyzer { } /** Builds an analyzer with the given stop words. */ public CASAnalyzer(CharArraySet stopWords) { - stopSet = StopFilter.makeStopSet(stopWords.toArray(new String[stopWords.size()])); + Iterator iter = stopWords.iterator(); + List<String> sw = new ArrayList<>(); + while(iter.hasNext()) { + char[] stopWord = (char[]) iter.next(); + sw.add(new String(stopWord)); + } + stopSet = StopFilter.makeStopSet(sw); } http://git-wip-us.apache.org/repos/asf/oodt/blob/ceadfcb2/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java ---------------------------------------------------------------------- diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java index 4a9641f..7b37f90 100644 --- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java +++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java @@ -77,6 +77,7 @@ public final class QueryTool { QueryParser parser; // note that "__FREE__" is a control work for free text searching parser = new QueryParser(freeTextBlock, new CASAnalyzer()); + Query luceneQ = null; try { luceneQ = (Query) parser.parse(query);