On 20/02/2013 11:28, Paul Taylor wrote:
Just updating codebase from Lucene 3.6 to Lucene 4.1 and seems my tests that use NormalizeCharMap for replacing characters in the anyalzers are not working.

bump, anybody I thought a self contained testcase would be enough to pique somebodys interest, am I doing something silly - maybe but I can't see it

Paul
Below Ive created a self-contained test case, this is the output when I run it


    --term=and--
    --term=gold--
    --term=platinum--
    name:"platinum and gold"
    Size1
    name:"platinum & gold"
    Size0

    java.lang.AssertionError:
    Expected :1
    Actual   :0
     <Click to see difference>
        at org.junit.Assert.fail(Assert.java:93)
        at org.junit.Assert.failNotEquals(Assert.java:647)
        at org.junit.Assert.assertEquals(Assert.java:128)
        at org.junit.Assert.assertEquals(Assert.java:472)
        at org.junit.Assert.assertEquals(Assert.java:456)
at org.musicbrainz.search.analysis.Lucene41CharFilterTest.testAmpersandSearching(Lucene41CharFilterTest.java:89)

As you can see the charfilter does seem to work because the the text 'platinum & gold' is converted to three terms 'platnum, and , gold'. In fact search is working for 'platinum and gold' but not working for the original "platinum & gold" even though both index and search using same analyzer. Maybe the problem is with the query parser, but its certainly related to 4.1 because worked previously.

thanks Paul


    package org.musicbrainz.search.analysis;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.analysis.Tokenizer;
    import org.apache.lucene.analysis.charfilter.MappingCharFilter;
    import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
    import org.apache.lucene.analysis.core.LowerCaseFilter;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.*;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.BytesRef;
    import org.apache.lucene.util.Version;
    import org.junit.Test;
    import java.io.Reader;

    import static org.junit.Assert.assertEquals;

    public class Lucene41CharFilterTest
    {
        class SimpleAnalyzer extends Analyzer {

            protected NormalizeCharMap charConvertMap;

            protected void setCharConvertMap() {

NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
                builder.add("&","and");
                charConvertMap = builder.build();
            }

            public SimpleAnalyzer() {
                setCharConvertMap();
            }

            @Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MusicbrainzTokenizer(Version.LUCENE_41,
                        new MappingCharFilter(charConvertMap, reader));
TokenStream filter = new LowerCaseFilter(Version.LUCENE_41,source);
                return new TokenStreamComponents(source, filter);
            }
        }

        @Test
        public void testAmpersandSearching() throws Exception {

            Analyzer analyzer = new SimpleAnalyzer();
            RAMDirectory dir = new RAMDirectory();
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_41,analyzer);
            IndexWriter writer = new IndexWriter(dir, writerConfig);
            {
                Document doc = new Document();
doc.add(new Field("name", "platinum & gold", Field.Store.YES, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.close();

            IndexReader ir = DirectoryReader.open(dir);
            Fields fields = MultiFields.getFields(ir);
            Terms terms = fields.terms("name");
            TermsEnum termsEnum = terms.iterator(null);
            BytesRef text;
            while((text = termsEnum.next()) != null) {
                System.out.println("--term=" + text.utf8ToString()+"--");
            }
            ir.close();

IndexSearcher searcher = new IndexSearcher(IndexReader.open(dir));
            {
Query q = new QueryParser(Version.LUCENE_41, "name", analyzer).parse("\"platinum and gold\"");
                System.out.println(q);
                TopDocs td = searcher.search(q, 10);
                System.out.println("Size"+td.scoreDocs.length);
                assertEquals(1, searcher.search(q, 10).totalHits);
            }

            searcher = new IndexSearcher(IndexReader.open(dir));
            {
Query q = new QueryParser(Version.LUCENE_41, "name", analyzer).parse("\"platinum & gold\"");
                System.out.println(q);
                TopDocs td = searcher.search(q, 10);
                System.out.println("Size"+td.scoreDocs.length);
                assertEquals(1, searcher.search(q, 10).totalHits);
            }
        }
    }


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org




---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Reply via email to