Hi,
Not sure if this is user error or a bug. AnalyzingInfixSuggester.build()
appears to be deleting the stored fields in my index.
Please see the attached unit test.
test1: creates an index
test2: debug logs stored fields, searches it
test3: does AnalyzingInfixSuggester.build() and looks up some prefixes
(similar to AnalyzingInfixSuggesterTest)
test4: creates a fresh AnalyzingInfixSuggester (no build) and looks up
some prefixes (similar to AnalyzingInfixSuggesterTest)
test5: repeat of test2, but this time the index has no stored fields and
it fails
The dog food server uses AnalyzingInfixSuggester and it searches OK.
Could it be searching a version of the index unmodified by
AnalyzingInfixSuggester.build()?
Any ideas appreciated.
Once this is sorted out I've got a modified AnalyzingInfixSuggester to
try that should handle a field with multiple values
(AnalyzingInfixSuggester only uses the first of multiple values).
Cheers,
Neil.
package org.t3as.snomed.common;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Tests 1 - 4 are similar to the unit tests for AnalyzingInfixSuggester except that test2 adds some searching.
* Test5 repeats the same search test as test2, but it fails this time.
* Conclusion: AnalyzingInfixSuggester.build() breaks the index for searching.
*/
public class TestSuggester {
private final Logger log = LoggerFactory.getLogger(getClass());
private final Version version = Version.LUCENE_47;
private final Analyzer analyzer = new EnglishAnalyzer(version);
private final File indexDir = new File("indexDir");
private final String fieldName = "name";
private static final String[] input = { "apple", "hand-made peppermints", "hydrogen peroxide", "damaged perpend replacement", "perpetration of sinister acts" };
private void dumpIndex(IndexReader r) throws IOException {
log.debug("dumpIndex: maxDoc = " + r.maxDoc());
StringBuilder buf = new StringBuilder();
for (int i = 0; i < r.maxDoc(); ++i) {
Document d = r.document(i);
buf.setLength(0);
String delim = "dumpIndex: doc " + i + ": ";
for (IndexableField f: d.getFields()) {
buf.append(delim).append(f.name()).append(" -> ").append(f.stringValue());
delim = ", ";
}
if (buf.length() == 0) buf.append(delim).append("no stored fields");
log.debug(buf.toString());
}
}
@Test
public void test1() throws IOException {
IndexWriterConfig cfg = new IndexWriterConfig(version, analyzer);
cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter w = new IndexWriter(FSDirectory.open(indexDir), cfg);
Document doc = new Document();
TextField fld = new TextField(fieldName, "value", Store.YES);
doc.add(fld);
for (String text: input) {
fld.setStringValue(text);
w.addDocument(doc);
}
w.close();
IndexReader r = DirectoryReader.open(FSDirectory.open(indexDir));
assertEquals(input.length, r.maxDoc());
r.close();
}
private void searchTest() throws Exception {
IndexSearcher s = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
dumpIndex(s.getIndexReader());
QueryParser p = new QueryParser(version, fieldName, analyzer);
Query q = p.parse("per*");
log.debug("searchTest: q = " + q.toString());
TopDocs hits = s.search(q, 10);
assertEquals(3, hits.totalHits);
assertEquals(3, hits.scoreDocs.length);
s.getIndexReader().close();
}
@Test
public void test2() throws Exception {
searchTest();
}
private static final class MyInputIterator implements InputIterator {
private final String[] input;
private int i = -1;
public MyInputIterator(String[] input) {
super();
this.input = input;
}
@Override
public BytesRef next() throws IOException {
++i;
return i < input.length ? new BytesRef(input[i]) : null;
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
@Override
public long weight() {
return 0;
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
}
private void lookupTest(AnalyzingInfixSuggester sug, String term, int expected) {
List<LookupResult> r = sug.lookup(term, 10, true, true);
log.debug("lookupTest: " + term + " -> " + r);
assertEquals(expected, r.size());
}
private void suggesterTest(boolean build) throws IOException {
AnalyzingInfixSuggester sug = new AnalyzingInfixSuggester(version, indexDir, analyzer, analyzer, 4);
if (build) sug.build(new MyInputIterator(input));
lookupTest(sug, "pe", 4);
lookupTest(sug, "per", 3);
lookupTest(sug, "perp", 2);
lookupTest(sug, "perpe", 2);
sug.close();
}
@Test
public void test3() throws IOException {
suggesterTest(true);
}
@Test
public void test4() throws IOException {
suggesterTest(false);
}
@Test
public void test5() throws Exception {
searchTest();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org