With some trial and error I realized two things...
1) the order of the terms in the BooleanQuery seems to matter - but in terms of their "natural order", not the order in the doc (which is why i was so confused trying to reproduce it) 2) the problem happens when using termVectors but *NOT* using termVectorPositions Test patch below demonstrates problem (applies to branch_9x) -Hoss http://www.lucidworks.com/ diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java index 341318739f1..b94d60c3f85 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java @@ -76,6 +76,51 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase { dir.close(); } + public void testTermVecButNoPositions1() throws Exception { + testTermVecButNoPositions("x", "y", "y x", "<b>y</b> <b>x</b>"); + } + public void testTermVecButNoPositions2() throws Exception { + testTermVecButNoPositions("y", "x", "y x", "<b>y</b> <b>x</b>"); + } + public void testTermVecButNoPositions3() throws Exception { + testTermVecButNoPositions("zzz", "yyy", "zzz yyy", "<b>zzz</b> <b>yyy</b>"); + } + public void testTermVecButNoPositions4() throws Exception { + testTermVecButNoPositions("zzz", "yyy", "yyy zzz", "<b>yyy</b> <b>zzz</b>"); + } + public void testTermVecButNoPositions(String aaa, String bbb, + String indexed, String expected) throws Exception { + + final FieldType tvNoPosType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); + tvNoPosType.setStoreTermVectors(true); + // tvNoPosType.setStoreTermVectorPositions(true); // cause of problem seems to be lack of positions + tvNoPosType.setStoreTermVectorOffsets(true); + tvNoPosType.freeze(); + + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); + + Field body = new Field("body", indexed, tvNoPosType); + Document document = new Document(); + document.add(body); + iw.addDocument(document); + try (IndexReader ir = iw.getReader()) { + iw.close(); + IndexSearcher searcher = newSearcher(ir); + BooleanQuery query = + new BooleanQuery.Builder() + // WTF? order of the terms in the boolean query also matters? + .add(new TermQuery(new Term("body", aaa)), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("body", bbb)), BooleanClause.Occur.MUST) + .build(); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build(); + String[] snippets = highlighter.highlight("body", query, topDocs, 2); + assertEquals(1, snippets.length); + assertTrue(snippets[0], snippets[0].contains(expected)); + } + } + public void testFetchTermVecsOncePerDoc() throws IOException { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org