Github user alessandrobenedetti commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/389#discussion_r223774170 --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java --- @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception { analyzer.close(); } + public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception { + MoreLikeThis mlt = new MoreLikeThis(reader); + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + mlt.setAnalyzer(analyzer); + mlt.setMinDocFreq(0); + mlt.setMinTermFreq(3); + mlt.setMinWordLen(1); + String sampleField1 = "text"; + String sampleField2 = "text2"; + mlt.setFieldNames(new String[]{sampleField1, sampleField2}); + + Map<String, Collection<Object>> filteredDocument = new HashMap<>(); + String textValue = "apache apache lucene lucene lucene"; + filteredDocument.put(sampleField1, Arrays.asList(textValue)); + filteredDocument.put(sampleField2, Arrays.asList(textValue)); + + BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument); + Collection<BooleanClause> clauses = query.clauses(); + assertEquals("Expected 1 clauses only!", 1, clauses.size()); + for (BooleanClause clause : clauses) { + Term term = ((TermQuery) clause.getQuery()).getTerm(); + assertThat(term, is(new Term(sampleField1, "lucene"))); + } + analyzer.close(); + } + + public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception { + MoreLikeThis mlt = new MoreLikeThis(reader); + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + mlt.setAnalyzer(analyzer); + mlt.setMinDocFreq(0); + mlt.setMinTermFreq(3); + mlt.setMinWordLen(1); + String sampleField1 = "text"; + String sampleField2 = "text2"; + mlt.setFieldNames(new String[]{sampleField1}); + + Map<String, Collection<Object>> filteredDocument = new HashMap<>(); + String textValue1 = "apache apache lucene lucene"; + String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2"; + filteredDocument.put(sampleField1, Arrays.asList(textValue1)); + filteredDocument.put(sampleField2, Arrays.asList(textValue2)); + + BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument); + Collection<BooleanClause> clauses = query.clauses(); + + HashSet<Term> unexpectedTerms = new HashSet<>(); + unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency + unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency + unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency + unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field + + //None of the Not Expected terms is in the query + for (BooleanClause clause : clauses) { + Term term = ((TermQuery) clause.getQuery()).getTerm(); + assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term)); + } + + assertEquals("Expected 0 clauses only!", 0, clauses.size()); + + analyzer.close(); + } + + public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception { + MoreLikeThis mlt = new MoreLikeThis(reader); + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + mlt.setAnalyzer(analyzer); + mlt.setMinDocFreq(1); + mlt.setMinTermFreq(2); + mlt.setMinWordLen(1); + String sampleField1 = "text"; + String sampleField2 = "text2"; + mlt.setFieldNames(new String[]{sampleField1}); + + Map<String, Collection<Object>> filteredDocument = new HashMap<>(); + String textValue1 = "apache apache lucene lucene"; + String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2"; + filteredDocument.put(sampleField1, Arrays.asList(textValue1)); + filteredDocument.put(sampleField2, Arrays.asList(textValue2)); + + BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument); + Collection<BooleanClause> clauses = query.clauses(); + HashSet<Term> clausesTerms = new HashSet<>(); + for (BooleanClause clause : clauses) { + Term term = ((TermQuery) clause.getQuery()).getTerm(); + clausesTerms.add(term); + } + assertEquals("Expected 2 clauses only!", 2, clauses.size()); + + HashSet<Term> expectedTerms = new HashSet<>(); + expectedTerms.add(new Term("text", "apache")); + expectedTerms.add(new Term("text", "lucene")); + + HashSet<Term> unexpectedTerms = new HashSet<>(); + unexpectedTerms.add(new Term("text", "apache2")); + unexpectedTerms.add(new Term("text", "lucene2")); + + //None of the Not Expected terms is in the query + for (BooleanClause clause : clauses) { + Term term = ((TermQuery) clause.getQuery()).getTerm(); + assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term)); --- End diff -- move clauses terms accumulation here
--- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org