On 20/02/2013 11:28, Paul Taylor wrote:
Just updating codebase from Lucene 3.6 to Lucene 4.1 and seems my
tests that use NormalizeCharMap for replacing characters in the
anyalzers are not working.
bump, anybody I thought a self contained testcase would be enough to
pique somebodys interest, am I doing something silly - maybe but I can't
see it
Paul
Below Ive created a self-contained test case, this is the output when
I run it
--term=and--
--term=gold--
--term=platinum--
name:"platinum and gold"
Size1
name:"platinum & gold"
Size0
java.lang.AssertionError:
Expected :1
Actual :0
<Click to see difference>
at org.junit.Assert.fail(Assert.java:93)
at org.junit.Assert.failNotEquals(Assert.java:647)
at org.junit.Assert.assertEquals(Assert.java:128)
at org.junit.Assert.assertEquals(Assert.java:472)
at org.junit.Assert.assertEquals(Assert.java:456)
at
org.musicbrainz.search.analysis.Lucene41CharFilterTest.testAmpersandSearching(Lucene41CharFilterTest.java:89)
As you can see the charfilter does seem to work because the the text
'platinum & gold' is converted to three terms 'platnum, and , gold'.
In fact search is working for 'platinum and gold' but not working for
the original "platinum & gold" even though both index and search using
same analyzer. Maybe the problem is with the query parser, but its
certainly related to 4.1 because worked previously.
thanks Paul
package org.musicbrainz.search.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.junit.Test;
import java.io.Reader;
import static org.junit.Assert.assertEquals;
public class Lucene41CharFilterTest
{
class SimpleAnalyzer extends Analyzer {
protected NormalizeCharMap charConvertMap;
protected void setCharConvertMap() {
NormalizeCharMap.Builder builder = new
NormalizeCharMap.Builder();
builder.add("&","and");
charConvertMap = builder.build();
}
public SimpleAnalyzer() {
setCharConvertMap();
}
@Override
protected TokenStreamComponents createComponents(String
fieldName, Reader reader) {
Tokenizer source = new
MusicbrainzTokenizer(Version.LUCENE_41,
new MappingCharFilter(charConvertMap, reader));
TokenStream filter = new
LowerCaseFilter(Version.LUCENE_41,source);
return new TokenStreamComponents(source, filter);
}
}
@Test
public void testAmpersandSearching() throws Exception {
Analyzer analyzer = new SimpleAnalyzer();
RAMDirectory dir = new RAMDirectory();
IndexWriterConfig writerConfig = new
IndexWriterConfig(Version.LUCENE_41,analyzer);
IndexWriter writer = new IndexWriter(dir, writerConfig);
{
Document doc = new Document();
doc.add(new Field("name", "platinum & gold",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Fields fields = MultiFields.getFields(ir);
Terms terms = fields.terms("name");
TermsEnum termsEnum = terms.iterator(null);
BytesRef text;
while((text = termsEnum.next()) != null) {
System.out.println("--term=" + text.utf8ToString()+"--");
}
ir.close();
IndexSearcher searcher = new
IndexSearcher(IndexReader.open(dir));
{
Query q = new QueryParser(Version.LUCENE_41, "name",
analyzer).parse("\"platinum and gold\"");
System.out.println(q);
TopDocs td = searcher.search(q, 10);
System.out.println("Size"+td.scoreDocs.length);
assertEquals(1, searcher.search(q, 10).totalHits);
}
searcher = new IndexSearcher(IndexReader.open(dir));
{
Query q = new QueryParser(Version.LUCENE_41, "name",
analyzer).parse("\"platinum & gold\"");
System.out.println(q);
TopDocs td = searcher.search(q, 10);
System.out.println("Size"+td.scoreDocs.length);
assertEquals(1, searcher.search(q, 10).totalHits);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org