Hi, Just noticed in the TestStopAnalyzer unit test that there were a lot of dependencies on other pieces of code just to test whether the StopAnalyzer actually properly removed stop words. Not a big change, but here is the diff produced by cvs diff -u TestStopAnalyzer
Cheers, Grant Ingersoll Index: TestStopAnalyzer.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java,v retrieving revision 1.2 diff -u -r1.2 TestStopAnalyzer.java --- TestStopAnalyzer.java 8 Dec 2003 16:16:32 -0000 1.2 +++ TestStopAnalyzer.java 30 Jan 2004 21:17:50 -0000 @@ -1,60 +1,125 @@ package org.apache.lucene.analysis; +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + + import junit.framework.TestCase; + import java.io.StringReader; -import java.util.ArrayList; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.Hits; +import java.io.IOException; +import java.util.Set; +import java.util.HashSet; -public class TestStopAnalyzer extends TestCase { - private StopAnalyzer stopAnalyzer = new StopAnalyzer(); +//import org.cnlp.utils.properties.ResourceBundleHelper; - public Token[] tokensFromAnalyzer(Analyzer analyzer, String text) - throws Exception { - TokenStream stream = - analyzer.tokenStream("contents", new StringReader(text)); - ArrayList tokenList = new ArrayList(); - while (true) { - Token token = stream.next(); - if (token == null) break; +public class TestStopAnalyzer extends TestCase { + private StopAnalyzer stop = new StopAnalyzer(); + + private Set inValidTokens = new HashSet(); + public TestStopAnalyzer(String s) { + super(s); + } - tokenList.add(token); + protected void setUp() { + for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.length; i++) { + inValidTokens.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]); + } - - return (Token[]) tokenList.toArray(new Token[0]); } + protected void tearDown() { + } - public void testPhraseQuery() throws Exception { - RAMDirectory directory = new RAMDirectory(); - IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true); - Document doc = new Document(); - doc.add(Field.Text("field", "the stop words are here")); - writer.addDocument(doc); - writer.close(); - - IndexSearcher searcher = new IndexSearcher(directory); - - // valid exact phrase query - PhraseQuery query = new PhraseQuery(); - query.add(new Term("field","stop")); - query.add(new Term("field","words")); - Hits hits = searcher.search(query); - assertEquals(1, hits.length()); - - // currently StopAnalyzer does not leave "holes", so this matches. - query = new PhraseQuery(); - query.add(new Term("field", "words")); - query.add(new Term("field", "here")); - hits = searcher.search(query); - assertEquals(1, hits.length()); - - searcher.close(); + public void testDefaults() { + + assertTrue(stop != null); + StringReader reader = new StringReader("This is a test of the english stop analyzer"); + TokenStream stream = stop.tokenStream("test", reader); + assertTrue(stream != null); + Token token = null; + try { + while ((token = stream.next()) != null) + { + assertTrue(inValidTokens.contains(token.termText()) == false); + } + } catch (IOException e) { + assertTrue(false); + } } -} + + public void testStopList() { + Set stopWordsSet = new HashSet(); + stopWordsSet.add("good"); + stopWordsSet.add("test"); + stopWordsSet.add("analyzer"); + StopAnalyzer newStop = new StopAnalyzer((String[])stopWordsSet.toArray(new String[3])); + StringReader reader = new StringReader("This is a good test of the english stop analyzer"); + TokenStream stream = newStop.tokenStream("test", reader); + assertTrue(stream != null); + Token token = null; + try { + while ((token = stream.next()) != null) + { + String text = token.termText(); + assertTrue(stopWordsSet.contains(text) == false); + } + } catch (IOException e) { + assertTrue(false); + } + + } + +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
