DCausse has uploaded a new change for review.
https://gerrit.wikimedia.org/r/281942
Change subject: Upgrade to elastic 2.0.0
......................................................................
Upgrade to elastic 2.0.0
* MultiPhrasePrefixQuery now uses the rewritten query because getTerms has been
removed
* Renamed DocsAndPositionsHitEnum to PostingsHitEnum to match lucene 5 names
* Excluded FILTER boolean clauses with isScoring() to avoid highlighting the
_type filter
* Now use guava directly instead of the shaded elastic packages as it is now
included in lib
Change-Id: I7e9d332ad39cf07d23e1cbd05d86f0ce0c1a6ba5
---
M README.md
M experimental-highlighter-core/pom.xml
M
experimental-highlighter-core/src/main/java/org/wikimedia/search/highlighter/experimental/tools/GraphvizHitEnumGenerator.java
M
experimental-highlighter-core/src/test/java/org/wikimedia/search/highlighter/experimental/hit/BreakIteratorHitEnumTest.java
M experimental-highlighter-elasticsearch-plugin/forbidden-signatures.txt
M experimental-highlighter-elasticsearch-plugin/pom.xml
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattener.java
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterPlugin.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/AbstractExperimentalHighlighterIntegrationTestBase.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattenerTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/BasicQueriesTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MultimatchTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/OptionsTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/PhraseQueryTest.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/RegexTest.java
M experimental-highlighter-lucene/pom.xml
M
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/QueryFlattener.java
R
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnum.java
M
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/TokenStreamHitEnum.java
M
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/weight/DefaultSimilarityTermWeigher.java
R
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromPostingsTest.java
R
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromTermVectorsTest.java
M pom.xml
27 files changed, 195 insertions(+), 180 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/search/highlighter
refs/changes/42/281942/1
diff --git a/README.md b/README.md
index bbe2c92..f55fafa 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,8 @@
| Experimental Highlighter Plugin | ElasticSearch |
|---------------------------------|-----------------|
-| 1.7.0, master branch | 1.7.X |
+| 2.0.0, master branch | 2.0.x |
+| 1.7.0 -> 1.7.1, 1.7 branch | 1.7.X |
| 1.6.0, 1.6 branch | 1.6.X |
| 1.5.0 -> 1.5.1, 1.5 branch | 1.5.X |
| 1.4.0 -> 1.4.1, 1.4 branch | 1.4.X |
diff --git a/experimental-highlighter-core/pom.xml
b/experimental-highlighter-core/pom.xml
index 095a3f7..651e12a 100644
--- a/experimental-highlighter-core/pom.xml
+++ b/experimental-highlighter-core/pom.xml
@@ -3,7 +3,7 @@
<parent>
<groupId>org.wikimedia.search.highlighter</groupId>
<artifactId>experimental</artifactId>
- <version>1.7.1-SNAPSHOT</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>experimental-highlighter-core</artifactId>
<packaging>jar</packaging>
diff --git
a/experimental-highlighter-core/src/main/java/org/wikimedia/search/highlighter/experimental/tools/GraphvizHitEnumGenerator.java
b/experimental-highlighter-core/src/main/java/org/wikimedia/search/highlighter/experimental/tools/GraphvizHitEnumGenerator.java
index b7f3882..4df7952 100644
---
a/experimental-highlighter-core/src/main/java/org/wikimedia/search/highlighter/experimental/tools/GraphvizHitEnumGenerator.java
+++
b/experimental-highlighter-core/src/main/java/org/wikimedia/search/highlighter/experimental/tools/GraphvizHitEnumGenerator.java
@@ -42,7 +42,6 @@
/**
* Add a node to the current graph
* @param hitEnum
- * @return the node id
*/
public void addNode(HitEnum hitEnum) {
addNode(hitEnum, Collections.<String, Object>emptyMap());
@@ -106,8 +105,8 @@
/**
* Add a link between two nodes.
- * @param from
- * @param to
+ * @param parent
+ * @param child
*/
public void addLink(HitEnum parent, HitEnum child) {
links.add(new Link(getHitEnumId(parent), getHitEnumId(child)));
diff --git
a/experimental-highlighter-core/src/test/java/org/wikimedia/search/highlighter/experimental/hit/BreakIteratorHitEnumTest.java
b/experimental-highlighter-core/src/test/java/org/wikimedia/search/highlighter/experimental/hit/BreakIteratorHitEnumTest.java
index 16eb47e..4960e70 100644
---
a/experimental-highlighter-core/src/test/java/org/wikimedia/search/highlighter/experimental/hit/BreakIteratorHitEnumTest.java
+++
b/experimental-highlighter-core/src/test/java/org/wikimedia/search/highlighter/experimental/hit/BreakIteratorHitEnumTest.java
@@ -15,8 +15,8 @@
import
org.wikimedia.search.highlighter.experimental.source.StringSourceExtracter;
public class BreakIteratorHitEnumTest extends AbstractHitEnumTestBase {
- @Override
- protected HitEnum buildEnum(String str) {
+ @Override
+ protected HitEnum buildEnum(String str) {
return BreakIteratorHitEnum.englishWords(str);
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/forbidden-signatures.txt
b/experimental-highlighter-elasticsearch-plugin/forbidden-signatures.txt
index d713bbf..242d8db 100644
--- a/experimental-highlighter-elasticsearch-plugin/forbidden-signatures.txt
+++ b/experimental-highlighter-elasticsearch-plugin/forbidden-signatures.txt
@@ -31,9 +31,6 @@
org.apache.lucene.index.IndexWriter#forceMergeDeletes() @ use
Merges#forceMergeDeletes
org.apache.lucene.index.IndexWriter#forceMergeDeletes(boolean) @ use
Merges#forceMergeDeletes
-@defaultMessage QueryWrapperFilter is cachable by default - use Queries#wrap
instead
-org.apache.lucene.search.QueryWrapperFilter#<init>(org.apache.lucene.search.Query)
-
@defaultMessage Only use wait / notify when really needed try to use
concurrency primitives, latches or callbacks instead.
java.lang.Object#wait()
java.lang.Object#wait(long)
diff --git a/experimental-highlighter-elasticsearch-plugin/pom.xml
b/experimental-highlighter-elasticsearch-plugin/pom.xml
index 1409ecb..6b0baf4 100644
--- a/experimental-highlighter-elasticsearch-plugin/pom.xml
+++ b/experimental-highlighter-elasticsearch-plugin/pom.xml
@@ -3,7 +3,7 @@
<parent>
<groupId>org.wikimedia.search.highlighter</groupId>
<artifactId>experimental</artifactId>
- <version>1.7.1-SNAPSHOT</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>experimental-highlighter-elasticsearch-plugin</artifactId>
<packaging>jar</packaging>
@@ -22,6 +22,16 @@
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>randomizedtesting-runner</artifactId>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>junit4-ant</artifactId>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.wikimedia.search.highlighter</groupId>
@@ -38,6 +48,12 @@
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.elasticsearch.plugin</groupId>
+ <artifactId>analysis-icu</artifactId>
+ <version>${elasticsearch.version}</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -54,11 +70,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <!-- override dep inherited from experimental-highlighter-lucene -->
<dependency>
- <groupId>org.elasticsearch</groupId>
- <artifactId>elasticsearch-analysis-icu</artifactId>
- <version>${elasticsearch.icu.version}</version>
- <scope>test</scope>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>18.0</version>
+ <scope>provided</scope>
</dependency>
</dependencies>
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
index a19a4ed..8b69d6b 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
@@ -14,8 +14,6 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
-import org.elasticsearch.common.base.Function;
-import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.text.StringAndBytesText;
@@ -57,18 +55,16 @@
import org.wikimedia.search.highlighter.experimental.tools.GraphvizHitEnum;
import
org.wikimedia.search.highlighter.experimental.tools.GraphvizSnippetFormatter;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+
public class ExperimentalHighlighter implements Highlighter {
+ public static final String NAME = "experimental";
private static final String CACHE_KEY = "highlight-experimental";
private static final Text EMPTY_STRING = new StringAndBytesText("");
private static final ESLogger log =
ESLoggerFactory.getLogger(ExperimentalHighlighter.class.getName());
@Override
- public String[] names() {
- return new String[] { "experimental" };
- }
-
- @Override
- @SuppressWarnings("rawtypes")
public boolean canHighlight(FieldMapper field) {
return true;
}
@@ -273,7 +269,7 @@
boolean removeHighFrequencyTermsFromCommonTerms =
getOption("remove_high_freq_terms_from_common_terms", true);
int maxExpandedTerms = getOption("max_expanded_terms", 1024);
// TODO simplify
- QueryCacheKey key = new
QueryCacheKey(context.query.originalQuery(), maxExpandedTerms, phraseAsTerms,
+ QueryCacheKey key = new QueryCacheKey(context.query,
maxExpandedTerms, phraseAsTerms,
removeHighFrequencyTermsFromCommonTerms);
weigher = cache.queryWeighers.get(key);
if (weigher != null) {
@@ -284,7 +280,7 @@
BytesRefHashTermInfos infos = new
BytesRefHashTermInfos(BigArrays.NON_RECYCLING_INSTANCE);
// context.context.addReleasable(infos);
weigher = new BasicQueryWeigher(new
ElasticsearchQueryFlattener(maxExpandedTerms, phraseAsTerms,
- removeHighFrequencyTermsFromCommonTerms), infos,
context.hitContext.topLevelReader(), context.query.originalQuery());
+ removeHighFrequencyTermsFromCommonTerms), infos,
context.hitContext.topLevelReader(), context.query);
// Build the QueryWeigher with the top level reader to get all
// the frequency information
cache.queryWeighers.put(key, weigher);
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
index a7f73af..551d067 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
@@ -9,10 +9,8 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.base.Function;
-import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
@@ -20,7 +18,7 @@
import org.elasticsearch.search.highlight.SearchContextHighlight.FieldOptions;
import
org.wikimedia.highlighter.experimental.elasticsearch.BytesRefTermWeigherCache;
import org.wikimedia.highlighter.experimental.elasticsearch.SegmenterFactory;
-import
org.wikimedia.highlighter.experimental.lucene.hit.DocsAndPositionsHitEnum;
+import org.wikimedia.highlighter.experimental.lucene.hit.PostingsHitEnum;
import org.wikimedia.highlighter.experimental.lucene.hit.TokenStreamHitEnum;
import
org.wikimedia.highlighter.experimental.lucene.hit.weight.BasicQueryWeigher;
import
org.wikimedia.highlighter.experimental.lucene.hit.weight.DefaultSimilarityTermWeigher;
@@ -38,6 +36,9 @@
import org.wikimedia.search.highlighter.experimental.snippet.MultiSegmenter;
import
org.wikimedia.search.highlighter.experimental.source.NonMergingMultiSourceExtracter;
import
org.wikimedia.search.highlighter.experimental.source.StringSourceExtracter;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
public class FieldWrapper {
private final HighlightExecutionContext executionContext;
@@ -70,7 +71,7 @@
public FieldWrapper(HighlightExecutionContext executionContext,
HighlighterContext context,
BasicQueryWeigher weigher, String fieldName) {
assert !context.fieldName.equals(fieldName);
- FieldMapper<?> mapper =
context.context.smartNameFieldMapper(fieldName);
+ FieldMapper mapper =
context.context.mapperService().documentMapper(context.hitContext.hit().type()).mappers().smartNameFieldMapper(fieldName);
this.executionContext = executionContext;
this.context = new HighlighterContext(fieldName, context.field,
mapper, context.context,
context.hitContext, context.query);
@@ -250,7 +251,7 @@
}
private boolean canUsePostingsHitEnum() {
- return context.mapper.fieldType().indexOptions() ==
FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ return context.mapper.fieldType().indexOptions() ==
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
private boolean canUseVectorsHitEnum() {
@@ -260,19 +261,19 @@
}
private HitEnum buildPostingsHitEnum() throws IOException {
- return
DocsAndPositionsHitEnum.fromPostings(context.hitContext.reader(),
- context.hitContext.docId(), context.mapper.names().indexName(),
+ return PostingsHitEnum.fromPostings(context.hitContext.reader(),
+ context.hitContext.docId(),
context.mapper.fieldType().names().indexName(),
weigher.acceptableTerms(), getQueryWeigher(false),
getCorpusWeigher(false), weigher);
}
private HitEnum buildTermVectorsHitEnum() throws IOException {
- return
DocsAndPositionsHitEnum.fromTermVectors(context.hitContext.reader(),
- context.hitContext.docId(), context.mapper.names().indexName(),
+ return PostingsHitEnum.fromTermVectors(context.hitContext.reader(),
+ context.hitContext.docId(),
context.mapper.fieldType().names().indexName(),
weigher.acceptableTerms(), getQueryWeigher(false),
getCorpusWeigher(false), weigher);
}
private HitEnum buildTokenStreamHitEnum() throws IOException {
- Analyzer analyzer = context.mapper.indexAnalyzer();
+ Analyzer analyzer = context.mapper.fieldType().indexAnalyzer();
if (analyzer == null) {
analyzer =
context.context.analysisService().defaultIndexAnalyzer();
}
@@ -358,7 +359,7 @@
public int getPositionGap() {
if (positionGap < 0) {
if (context.mapper instanceof StringFieldMapper) {
- positionGap = ((StringFieldMapper)
context.mapper).getPositionOffsetGap();
+ positionGap = ((StringFieldMapper)
context.mapper).getPositionIncrementGap();
} else {
positionGap = 1;
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattener.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattener.java
index 5513966..6455e4c 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattener.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattener.java
@@ -1,12 +1,8 @@
package org.wikimedia.highlighter.experimental.elasticsearch;
-import java.util.List;
-
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
-import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
-import org.elasticsearch.common.lucene.search.XFilteredQuery;
import
org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.wikimedia.highlighter.experimental.lucene.QueryFlattener;
@@ -26,15 +22,11 @@
@Override
protected boolean flattenUnknown(Query query, float pathBoost, Object
sourceOverride,
IndexReader reader, Callback callback) {
- if (query instanceof XFilteredQuery) {
- flattenQuery((XFilteredQuery) query, pathBoost, sourceOverride,
reader, callback);
+ if (query instanceof FilteredQuery) {
+ flattenQuery((FilteredQuery) query, pathBoost, sourceOverride,
reader, callback);
return true;
}
- if (query instanceof MultiPhrasePrefixQuery) {
- flattenQuery((MultiPhrasePrefixQuery) query, pathBoost,
sourceOverride, reader,
- callback);
- return true;
- }
+
if (query instanceof FunctionScoreQuery) {
flattenQuery((FunctionScoreQuery) query, pathBoost,
sourceOverride, reader,
callback);
@@ -48,53 +40,13 @@
return false;
}
- protected void flattenQuery(XFilteredQuery query, float pathBoost, Object
sourceOverride,
+ protected void flattenQuery(FilteredQuery query, float pathBoost, Object
sourceOverride,
IndexReader reader, Callback callback) {
if (query.getQuery() != null) {
flatten(query.getQuery(), pathBoost * query.getBoost(),
sourceOverride, reader,
callback);
}
// TODO maybe flatten filter like Elasticsearch does
- }
-
- protected void flattenQuery(MultiPhrasePrefixQuery query, float pathBoost,
- Object sourceOverride, IndexReader reader, Callback callback) {
- // Note that we don't declare all of these to come from a single source
- // because that will cause each individual term to be devalued in
- // relation to things outside the term query
- List<Term[]> termArrays = query.getTermArrays();
- float boost = pathBoost * query.getBoost();
- if (termArrays.isEmpty()) {
- return;
- }
- int sizeMinus1 = termArrays.size() - 1;
- if (phraseAsTerms()) {
- for (int i = 0; i < sizeMinus1; i++) {
- Term[] termArray = termArrays.get(i);
- for (Term term : termArray) {
- callback.flattened(term.bytes(), boost, sourceOverride);
- }
- }
- for (Term term : termArrays.get(sizeMinus1)) {
- flattenPrefixQuery(term.bytes(), boost, sourceOverride,
callback);
- }
- } else {
- callback.startPhrase(termArrays.size(), boost);
- for (int i = 0; i < sizeMinus1; i++) {
- Term[] termArray = termArrays.get(i);
- callback.startPhrasePosition(termArray.length);
- for (Term term : termArray) {
- callback.flattened(term.bytes(), 0, sourceOverride);
- }
- callback.endPhrasePosition();
- }
- callback.startPhrasePosition(termArrays.get(sizeMinus1).length);
- for (Term term : termArrays.get(sizeMinus1)) {
- flattenPrefixQuery(term.bytes(), 0, sourceOverride, callback);
- }
- callback.endPhrasePosition();
- callback.endPhrase(query.getField(), query.getSlop(), boost);
- }
}
protected void flattenQuery(FunctionScoreQuery query, float pathBoost,
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
index e2b066c..b93e676 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
@@ -1,13 +1,15 @@
package org.wikimedia.highlighter.experimental.elasticsearch.plugin;
import org.elasticsearch.common.inject.AbstractModule;
-import org.elasticsearch.common.inject.multibindings.Multibinder;
+import org.elasticsearch.common.inject.multibindings.MapBinder;
import org.elasticsearch.search.highlight.ExperimentalHighlighter;
import org.elasticsearch.search.highlight.Highlighter;
public class ExperimentalHighlighterModule extends AbstractModule {
@Override
protected void configure() {
- Multibinder.newSetBinder(binder(),
Highlighter.class).addBinding().to(ExperimentalHighlighter.class);
+ MapBinder.newMapBinder(binder(), String.class, Highlighter.class)
+ .addBinding(ExperimentalHighlighter.NAME)
+ .to(ExperimentalHighlighter.class);
}
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterPlugin.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterPlugin.java
index 451299c..112b949 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterPlugin.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterPlugin.java
@@ -1,12 +1,12 @@
package org.wikimedia.highlighter.experimental.elasticsearch.plugin;
import java.util.Collection;
+import java.util.Collections;
-import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.inject.Module;
-import org.elasticsearch.plugins.AbstractPlugin;
+import org.elasticsearch.plugins.Plugin;
-public class ExperimentalHighlighterPlugin extends AbstractPlugin {
+public class ExperimentalHighlighterPlugin extends Plugin {
@Override
public String description() {
@@ -19,9 +19,7 @@
}
@Override
- public Collection<Class<? extends Module>> modules() {
- Collection<Class<? extends Module>> modules = Lists.newArrayList();
- modules.add(ExperimentalHighlighterModule.class);
- return modules;
+ public Collection<Module> nodeModules() {
+ return Collections.<Module>singleton(new
ExperimentalHighlighterModule());
}
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/AbstractExperimentalHighlighterIntegrationTestBase.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/AbstractExperimentalHighlighterIntegrationTestBase.java
index 8ccc782..f1e6e1b 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/AbstractExperimentalHighlighterIntegrationTestBase.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/AbstractExperimentalHighlighterIntegrationTestBase.java
@@ -8,18 +8,20 @@
import java.util.List;
import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.common.collect.ImmutableList;
-import org.elasticsearch.common.collect.ImmutableMap;
-import org.elasticsearch.common.settings.ImmutableSettings;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.plugins.PluginsService;
-import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
+import
org.wikimedia.highlighter.experimental.elasticsearch.plugin.ExperimentalHighlighterPlugin;
[email protected](scope =
ElasticsearchIntegrationTest.Scope.SUITE, transportClientRatio = 0.0)
+@ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0)
public abstract class AbstractExperimentalHighlighterIntegrationTestBase
extends
- ElasticsearchIntegrationTest {
+ESIntegTestCase {
protected static final List<String> HIT_SOURCES =
ImmutableList.of("postings", "vectors",
"analyze");
@@ -197,8 +199,10 @@
*/
@Override
protected Settings nodeSettings(int nodeOrdinal) {
- return ImmutableSettings.builder().put(super.nodeSettings(nodeOrdinal))
- .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH,
true).build();
+ return Settings.settingsBuilder()
+ .put("plugin.types",
+ ExperimentalHighlighterPlugin.class.getName()
+ + "," + AnalysisICUPlugin.class.getName())
+ .put(super.nodeSettings(nodeOrdinal)).build();
}
-
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattenerTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattenerTest.java
index 2db22f7..6636620 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattenerTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/ElasticsearchQueryFlattenerTest.java
@@ -1,8 +1,5 @@
package org.wikimedia.highlighter.experimental.elasticsearch;
-import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertThat;
import static org.mockito.Matchers.anyFloat;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
@@ -12,20 +9,28 @@
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
-import static
org.wikimedia.highlighter.experimental.lucene.LuceneMatchers.recognises;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import
org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
import
org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
import
org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery.FilterFunction;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.common.lucene.search.function.ScoreFunction;
+import org.elasticsearch.index.mapper.ParseContext.Document;
import org.junit.Test;
-import org.mockito.ArgumentCaptor;
import org.wikimedia.highlighter.experimental.lucene.QueryFlattener.Callback;
public class ElasticsearchQueryFlattenerTest {
@@ -58,6 +63,21 @@
}
private void phrasePrefixQueryTestCase(boolean phraseAsTerms) {
+ final IndexReader ir;
+ try {
+ // Previously MultiPhraseQuery was flattened directly
+ // This is not possible anymore, so we need to rewrite
+ // but to rewrite we need an IndexReader with a doc.
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new
StandardAnalyzer()));
+ Document doc = new Document();
+ doc.add(new TextField("test", "foo qux bart foo quux another",
Store.NO));
+ iw.addDocument(doc);
+ iw.close();
+ ir = DirectoryReader.open(dir);
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
Term foo = new Term("test", "foo");
Term qux = new Term("test", "qux");
@@ -68,24 +88,21 @@
query.add(new Term[] { qux, quux });
query.add(new Term[] { bar, anoth });
+ Term bart = new Term("test", "bart");
+ Term another = new Term("test", "another");
+
Callback callback = mock(Callback.class);
- new ElasticsearchQueryFlattener(1, phraseAsTerms, true).flatten(query,
null, callback);
+ new ElasticsearchQueryFlattener(1, phraseAsTerms, true).flatten(query,
ir, callback);
- // The first positions are sent as terms
- verify(callback).flattened(foo.bytes(), phraseAsTerms ? 1f : 0, null);
+ verify(callback).flattened(foo.bytes(), phraseAsTerms ? 1f : 0, query);
+ verify(callback).flattened(qux.bytes(), phraseAsTerms ? 1f : 0, query);
+ verify(callback).flattened(quux.bytes(), phraseAsTerms ? 1f : 0,
query);
+ verify(callback).flattened(bart.bytes(), phraseAsTerms ? 1f : 0,
query);
+ verify(callback).flattened(another.bytes(), phraseAsTerms ? 1f : 0,
query);
verify(callback, never()).flattened(eq(bar.bytes()), anyFloat(),
isNull(Query.class));
+ verify(callback, never()).flattened(eq(anoth.bytes()), anyFloat(),
isNull(Query.class));
- // The last position is sent as prefix automata
- ArgumentCaptor<Automaton> a = ArgumentCaptor.forClass(Automaton.class);
- verify(callback, times(2)).flattened(a.capture(), phraseAsTerms ?
eq(1f) : eq(0f), anyInt());
- assertThat(
- a.getAllValues().get(0),
- allOf(recognises("bar"), recognises("barr"),
recognises("bart"),
- not(recognises("bor")), not(recognises("anoth"))));
- assertThat(
- a.getAllValues().get(1),
- allOf(recognises("anoth"), recognises("anothe"),
recognises("another"),
- not(recognises("anoother")), not(recognises("bar"))));
+ verify(callback).flattened(another.bytes(), phraseAsTerms ? 1f : 0,
query);
if (phraseAsTerms) {
verify(callback, never()).startPhrase(anyInt(), anyFloat());
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/BasicQueriesTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/BasicQueriesTest.java
index 37f3506..c650bf9 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/BasicQueriesTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/BasicQueriesTest.java
@@ -1,6 +1,5 @@
package org.wikimedia.highlighter.experimental.elasticsearch.integration;
-import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
@@ -204,7 +203,7 @@
client().prepareIndex("test", "test", "1").setSource("test", "test",
"bar", 2).get();
refresh();
- SearchRequestBuilder search =
testSearch(functionScoreQuery(termQuery("test", "test")).add(termFilter("test",
"test"), fieldValueFactorFunction("bar")));
+ SearchRequestBuilder search =
testSearch(functionScoreQuery(termQuery("test", "test")).add(termQuery("test",
"test"), fieldValueFactorFunction("bar")));
for (String hitSource : HIT_SOURCES) {
SearchResponse response = setHitSource(search, hitSource).get();
assertHighlight(response, 0, "test", 0, equalTo("<em>test</em>"));
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
index f05ebc0..f8a15a7 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
@@ -1,13 +1,13 @@
package org.wikimedia.highlighter.experimental.elasticsearch.integration;
-import static org.elasticsearch.index.query.FilterBuilders.idsFilter;
+import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
import static
org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
-import static org.elasticsearch.index.query.QueryBuilders.queryString;
+import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.regexpQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
@@ -29,13 +29,14 @@
import java.util.Map;
import java.util.concurrent.ExecutionException;
+import com.google.common.collect.ImmutableList;
+
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.StopWatch;
-import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.query.BoolQueryBuilder;
@@ -44,7 +45,7 @@
import org.junit.Test;
import
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
-import
com.carrotsearch.ant.tasks.junit4.dependencies.com.google.common.base.Charsets;
+import com.google.common.base.Charsets;
import com.google.common.io.Resources;
/**
@@ -212,7 +213,7 @@
buildIndex();
indexTestData("What-a-Mess is a series of children's books written by
British comedy writer Frank Muir and illustrated by Joseph Wright. It was later
made into an animated series in the UK in 1990 and again in 1995 by DIC
Entertainment and aired on ABC in the United States. It aired on YTV from 1995
to 1999 in Canada. The title character is a disheveled (hence his nickname),
accident-prone Afghan Hound puppy, whose real name was Prince Amir of Kinjan.
Central Independent Television, the Independent Television contractor for the
Midlands, created following the restructuring of ATV and commencing broadcast
on 1 January 1982, Link Licensing & Bevanfield Films produced the first series
and DIC Entertainment produced the second series. Both of them were narrated by
Frank Muir. What-a-Mess - A scruffy Afghan puppy in which is the main
character of the entire franchise. His Breed name is Prince Amir of Kinjan, and
has a yellow duck sitting on top of his head. In the US animated version, the
duck was coloured blue, as if his character was merged with the blue bird in
the UK animated version and books, and was also given a name by What-A-Mess
called Baldwin. In the US animated version, What-A-Mess is voiced by Ryan
O'Donohue. What-a-Mess's Mother - Also known as The Duchess of Kinjan is a
beautiful pedigree Afghan Hound mother to What-a-Mess, and is voiced by Miriam
Flynn in the US version. Archbishop of Canterbury - A scruffy dark blue dog
with brown patches which What-A-Mess met and befriended in What-A-Mess Goes to
the Seaside. He's named this way because when What-A-Mess introduces himself
with his breed name he sarcastically replies \"Sure, and I'm the Archbishop of
Canterbury!\", which the naive pup takes as his actual name. His name was
changed to Norton in the US Animated Version, and he was voiced by Dana Hill.
The Cat Next Door - Also known as Felicia in the US animated version, is a
brown Siamese Cat that loves to tease What-A-Mess at times. In the US animated
version, she was coloured blue and she was voiced by Jo Ann Harris Belson.
Cynthia - A Hedgehog which What-A-Mess befriended in What-A-Mess Goes to
School. Her character was redesigned to become a mole named Ramona in the US
animated version, due to the fact that Hedgehogs aren't native to America. In
the US animated version, she is voiced by Candi Milo. Trash - Only in the US
animated version, Trash is a Bull Terrier who is a real trouble maker to
What-A-Mess. His real name is actually Francis He is voiced by Joe Nipote.
Frank - An Old English Sheepdog that narrates the US animated version of
What-A-Mess, voiced by Frank Muir himself! What-a-Mess What-a-Mess The Good
What-a-Mess at the Seaside What-a-Mess Goes to School Prince What-a-Mess Super
What-a-Mess What-a-Mess and the Cat Next Door What-a-Mess and the Hairy Monster
Four Seasons What-a-Mess in Spring What-a-Mess in Summer What-a-Mess in Autumn
What-a-Mess in Winter Four Square Meals What-a-Mess has Breakfast What-a-Mess
has Lunch What-a-Mess has Tea What-a-Mess has Supper Mini Books What-a-Mess has
a Brain Wave What-a-Mess and Little Poppet What-a-Mess and a trip to the Vet
What-a-Mess the Beautiful What-a-Mess Goes to Town What-a-Mess Goes on
Television What-a-Mess Goes Camping What-a-Mess Goes to the Seaside /
1990.03.26 What-a-Mess Goes to School / 1990.04.02 Prince What-a-Mess /
1990.04.09 Super What-a-Mess / 1990.04.16 What-a-Mess Keeps Cool / 1990.04.30
What-a-Mess and Cynthia the Hedgehog / 1990.05.14 What-a-Mess Has a Brain Wave!
/ 1990.05.21 What-a-Mess and the Cat Next Door / 1990.06.04 What-a-Mess and
Little Poppet / 1990.06.18 What-a-Mess Goes Camping / 1990.07.02 What-a-Mess
The Beautiful / 1990.07.09 What-a-Mess Goes to Town / 1990.07.16 What-a-Mess
Goes to the Vet / 1990.07.23 Talkin' Trash (September 16, 1995) A Bone to
Pick Midnight Snack Schoolin' Around The Legend of Junkyard Jones It's Raining
Cats and Dogs Home Alone...Almost Super What-A-Mess The Recliner Afghan Holiday
The Bone Tree Just Four More Left The Ropes What-A-Mess Has Breakfast Prize
Puppy The Great Eascape The Scarecrow and Prince Amir Shampooed Show and Tail I
Spy, I Cry, I Try What-A-Mess and the Hairy Monster Trick Or Treat My Teatime
with Frank Out With the Garbage Dr. What-A-Mess Ultimate What-A-Mess This
Hydrant Is Mine His Majesty, Prince What-A-Mess Trash's Wonderful Life
Snowbound The Thanksgiving Turkey Santa What-A-Mess Here Comes Santa Paws All
Around the Mallberry Bush What-A-Mess At the Movies His Royal Highness, Prince
What-A-Mess Party at Poppet's Take Me Out to the Dog Park The Watch Out Dog
Molenapped! Pound Pals Taste Test Slobber on a Stick Scout's Honor Seein'
Double Luck on His Side What-A-Mess Keeps the Doctor Away There's No Business
like Shoe Business Joy Rider Baldwin's Family Reunion Do the Mess Around On
Vacation Messy Encounters Dog Days of Summer Fetch! Real Puppies Don't Meow
Invasion of the Puppy Snatchers The Ballad of El Pero What-a-Mess Has Lunch
Walking the Boy Russell Williams, Imogen (4 July 2007). \"Whatever happened
to What-a-mess?\". London: The Guardian. Retrieved 3 January 2011. \"IMDB
What-a-mess\". Retrieved 3 January 2011. 1990 series episode guide at the Big
Cartoon DataBase");
- SearchRequestBuilder search = testSearch(queryString("what love?"));
+ SearchRequestBuilder search = testSearch(queryStringQuery("what
love?"));
for (String hitSource : HIT_SOURCES) {
SearchResponse response = setHitSource(search, hitSource).get();
assertNoFailures(response);
@@ -292,7 +293,7 @@
lotsOfTermsTestCase(watch, "phrase prefix and term", boolQuery()
.should(matchPhrasePrefixQuery("test", "zooma zoomb zoo"))
.should(termQuery("test", "zooma")));
- lotsOfTermsTestCase(watch, "phrase prefix and term",
queryString("test:\"zoooo\" OR test2:\"zaaap\""));
+ lotsOfTermsTestCase(watch, "phrase prefix and term",
queryStringQuery("test:\"zoooo\" OR test2:\"zaaap\""));
logger.info(watch.prettyPrint());
}
@@ -387,7 +388,7 @@
Map<String, Object> options = new HashMap<String, Object>();
options.put("max_expanded_terms", 1);
- SearchRequestBuilder search =
testSearch(filteredQuery(rangeQuery("test").from("teso").to("tesz"),
idsFilter("test").addIds("1")))
+ SearchRequestBuilder search =
testSearch(filteredQuery(rangeQuery("test").from("teso").to("tesz"),
idsQuery("test").addIds("1")))
.setHighlighterOptions(options);
for (String hitSource : HIT_SOURCES) {
options.put("hit_source", hitSource);
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MultimatchTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MultimatchTest.java
index bbc83e5..f1716c4 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MultimatchTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MultimatchTest.java
@@ -1,6 +1,6 @@
package org.wikimedia.highlighter.experimental.elasticsearch.integration;
-import static org.elasticsearch.index.query.FilterBuilders.idsFilter;
+import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
@@ -89,7 +89,7 @@
indexTestData();
SearchRequestBuilder search =
testSearch(filteredQuery(multiMatchQuery("very test",
"test").cutoffFrequency(1f),
- idsFilter("test").addIds("1")));
+ idsQuery("test").addIds("1")));
for (String hitSource : HIT_SOURCES) {
SearchResponse response = setHitSource(search, hitSource).get();
assertHighlight(response, 0, "test", 0, equalTo("tests
<em>very</em> simple <em>test</em>"));
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/OptionsTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/OptionsTest.java
index 55979b8..2bc4c2c 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/OptionsTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/OptionsTest.java
@@ -17,10 +17,11 @@
import java.util.Map;
import java.util.concurrent.ExecutionException;
+import com.google.common.collect.ImmutableMap;
+
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.highlight.HighlightBuilder;
@@ -257,7 +258,7 @@
} else {
// I wish I could throw an HTTP 400 here but I don't believe I
// can.
- assertFailures(search, RestStatus.INTERNAL_SERVER_ERROR,
+ assertFailures(search, RestStatus.BAD_REQUEST,
containsString("as a hit source without setting"));
}
}
@@ -273,7 +274,7 @@
} else {
// I wish I could throw an HTTP 400 here but I don't believe I
// can.
- assertFailures(search, RestStatus.INTERNAL_SERVER_ERROR,
+ assertFailures(search, RestStatus.BAD_REQUEST,
containsString("as a hit source without setting"));
}
}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/PhraseQueryTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/PhraseQueryTest.java
index 5d1d1cf..220f5a5 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/PhraseQueryTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/PhraseQueryTest.java
@@ -3,7 +3,7 @@
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static
org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
-import static org.elasticsearch.index.query.QueryBuilders.queryString;
+import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
@@ -325,7 +325,7 @@
indexTestData(data);
SearchRequestBuilder search = testSearch(
-
queryString(query).defaultField("test.cirrus_english").autoGeneratePhraseQueries(
+
queryStringQuery(query).defaultField("test.cirrus_english").autoGeneratePhraseQueries(
true)).addHighlightedField("test.cirrus_english");
for (String hitSource : HIT_SOURCES) {
SearchResponse response = setHitSource(search, hitSource).get();
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/RegexTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/RegexTest.java
index b9aee20..fd8077d 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/RegexTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/RegexTest.java
@@ -13,11 +13,12 @@
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.rest.RestStatus;
import org.junit.Test;
import
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+import com.google.common.collect.ImmutableList;
+
/**
* Tests for regex highlighting.
*/
diff --git a/experimental-highlighter-lucene/pom.xml
b/experimental-highlighter-lucene/pom.xml
index 958b108..dceba0b 100644
--- a/experimental-highlighter-lucene/pom.xml
+++ b/experimental-highlighter-lucene/pom.xml
@@ -3,7 +3,7 @@
<parent>
<groupId>org.wikimedia.search.highlighter</groupId>
<artifactId>experimental</artifactId>
- <version>1.7.1-SNAPSHOT</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>experimental-highlighter-lucene</artifactId>
<packaging>jar</packaging>
@@ -22,6 +22,16 @@
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>randomizedtesting-runner</artifactId>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>junit4-ant</artifactId>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
diff --git
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/QueryFlattener.java
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/QueryFlattener.java
index 09e651d..eae0dfe 100644
---
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/QueryFlattener.java
+++
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/QueryFlattener.java
@@ -210,7 +210,13 @@
protected void flattenQuery(BooleanQuery query, float pathBoost, Object
sourceOverride,
IndexReader reader, Callback callback) {
for (BooleanClause clause : query) {
- if (!clause.isProhibited()) {
+ // Exclude FILTER clauses with isScoring(), before lucene 5 most of
+ // these queries were wrapped inside a FitleredQuery
+ // but now the prefered way is to add a boolean clause with
+ // Occur.FILTER
+ // e.g. the _type filter with elasticsearch now uses this type of
+ // construct.
+ if (!clause.isProhibited() && clause.isScoring()) {
flatten(clause.getQuery(), pathBoost * query.getBoost(),
sourceOverride, reader,
callback);
}
diff --git
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnum.java
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnum.java
similarity index 85%
rename from
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnum.java
rename to
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnum.java
index 3861b83..4e761f4 100644
---
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnum.java
+++
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnum.java
@@ -5,11 +5,11 @@
import java.util.List;
import java.util.Locale;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -25,11 +25,11 @@
import org.wikimedia.search.highlighter.experimental.hit.TermWeigher;
/**
- * Hit enum that pulls its information from a {@link DocsAndPositionsEnum}
+ * Hit enum that pulls its information from a {@link PostingsEnum}
* positioned on the appropriate doc. The hits are in document order in for a
* single term.
*/
-public class DocsAndPositionsHitEnum extends AbstractHitEnum {
+public class PostingsHitEnum extends AbstractHitEnum {
public static HitEnum fromTermVectors(IndexReader reader, int docId,
String fieldName,
CompiledAutomaton acceptable, TermWeigher<BytesRef> queryWeigher,
TermWeigher<BytesRef> corpusWeigher, TermSourceFinder<BytesRef>
sourceFinder)
@@ -47,10 +47,10 @@
CompiledAutomaton acceptable, TermWeigher<BytesRef> queryWeigher,
TermWeigher<BytesRef> corpusWeigher, TermSourceFinder<BytesRef>
sourceFinder)
throws IOException {
- List<AtomicReaderContext> leaves = reader.getContext().leaves();
+ List<LeafReaderContext> leaves = reader.getContext().leaves();
int leaf = ReaderUtil.subIndex(docId, leaves);
- AtomicReaderContext subcontext = leaves.get(leaf);
- AtomicReader atomicReader = subcontext.reader();
+ LeafReaderContext subcontext = leaves.get(leaf);
+ LeafReader atomicReader = subcontext.reader();
docId -= subcontext.docBase;
return fromTerms(atomicReader.terms(fieldName), acceptable, reader,
docId,
queryWeigher, corpusWeigher, sourceFinder);
@@ -68,9 +68,9 @@
List<HitEnum> enums = new ArrayList<HitEnum>();
// Last enum that didn't find anything. We can reuse it.
- DocsAndPositionsEnum dp = null;
+ PostingsEnum dp = null;
while ((term = termsEnum.next()) != null) {
- dp = termsEnum.docsAndPositions(null, dp,
DocsAndPositionsEnum.FLAG_OFFSETS);
+ dp = termsEnum.postings(null, dp, PostingsEnum.OFFSETS);
if (dp == null) {
continue;
}
@@ -83,7 +83,7 @@
continue;
}
}
- HitEnum e = new DocsAndPositionsHitEnum(dp,
queryWeigher.weigh(term), corpusWeigher.weigh(term), sourceFinder.source(term));
+ HitEnum e = new PostingsHitEnum(dp, queryWeigher.weigh(term),
corpusWeigher.weigh(term), sourceFinder.source(term));
enums.add(e);
dp = null;
}
@@ -96,7 +96,7 @@
return new MergingHitEnum(enums, HitEnum.LessThans.POSITION);
}
- private final DocsAndPositionsEnum dp;
+ private final PostingsEnum dp;
private final int freq;
private final float queryWeight;
private final float corpusWeight;
@@ -104,7 +104,7 @@
private int current;
private int position;
- public DocsAndPositionsHitEnum(DocsAndPositionsEnum dp, float queryWeight,
float corpusWeight, int source) {
+ public PostingsHitEnum(PostingsEnum dp, float queryWeight, float
corpusWeight, int source) {
this.dp = dp;
this.queryWeight = queryWeight;
this.corpusWeight = corpusWeight;
diff --git
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/TokenStreamHitEnum.java
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/TokenStreamHitEnum.java
index f9d883f..602a1e1 100644
---
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/TokenStreamHitEnum.java
+++
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/TokenStreamHitEnum.java
@@ -32,9 +32,11 @@
private int source;
/**
- *
+ * Build an HitEnum for a TokenStream
* @param tokenStream
- * @param weigher
+ * @param queryWeigher
+ * @param corpusWeigher
+ * @param sourceFinder
*/
public TokenStreamHitEnum(TokenStream tokenStream, TermWeigher<BytesRef>
queryWeigher,
TermWeigher<BytesRef> corpusWeigher, TermSourceFinder<BytesRef>
sourceFinder) {
diff --git
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/weight/DefaultSimilarityTermWeigher.java
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/weight/DefaultSimilarityTermWeigher.java
index 0ad40be..e55990e 100644
---
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/weight/DefaultSimilarityTermWeigher.java
+++
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/weight/DefaultSimilarityTermWeigher.java
@@ -4,6 +4,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.util.BytesRef;
import
org.wikimedia.highlighter.experimental.lucene.WrappedExceptionFromLucene;
diff --git
a/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromPostingsTest.java
b/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromPostingsTest.java
similarity index 87%
rename from
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromPostingsTest.java
rename to
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromPostingsTest.java
index c9db689..f78e7eb 100644
---
a/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromPostingsTest.java
+++
b/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromPostingsTest.java
@@ -13,11 +13,11 @@
/**
* Tests DocsAndPositionsHitEnum using term vectors.
*/
-public class DocsAndPositionsHitEnumFromPostingsTest extends
+public class PostingsHitEnumFromPostingsTest extends
AbstractDocsAndPositionsHitEnumTestBase {
protected HitEnum buildEnum(String source, Analyzer analyzer,
CompiledAutomaton acceptable) {
try {
- return
DocsAndPositionsHitEnum.fromPostings(buildIndexReader(source, analyzer), 0,
+ return PostingsHitEnum.fromPostings(buildIndexReader(source,
analyzer), 0,
"field", acceptable, new ConstantTermWeigher<BytesRef>(),
new ConstantTermWeigher<BytesRef>(), new
NoSourceTermSourceFinder<BytesRef>());
} catch (IOException e) {
diff --git
a/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromTermVectorsTest.java
b/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromTermVectorsTest.java
similarity index 86%
rename from
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromTermVectorsTest.java
rename to
experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromTermVectorsTest.java
index 67779d3..482b52b 100644
---
a/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/DocsAndPositionsHitEnumFromTermVectorsTest.java
+++
b/experimental-highlighter-lucene/src/test/java/org/wikimedia/highlighter/experimental/lucene/hit/PostingsHitEnumFromTermVectorsTest.java
@@ -13,11 +13,11 @@
/**
* Tests DocsAndPositionsHitEnum using term vectors.
*/
-public class DocsAndPositionsHitEnumFromTermVectorsTest extends
+public class PostingsHitEnumFromTermVectorsTest extends
AbstractDocsAndPositionsHitEnumTestBase {
protected HitEnum buildEnum(String source, Analyzer analyzer,
CompiledAutomaton acceptable) {
try {
- return
DocsAndPositionsHitEnum.fromTermVectors(buildIndexReader(source, analyzer), 0,
+ return PostingsHitEnum.fromTermVectors(buildIndexReader(source,
analyzer), 0,
"field", acceptable, new ConstantTermWeigher<BytesRef>(),
new ConstantTermWeigher<BytesRef>(), new
NoSourceTermSourceFinder<BytesRef>());
} catch (IOException e) {
diff --git a/pom.xml b/pom.xml
index b65b6e9..861ca20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
<groupId>org.wikimedia.search.highlighter</groupId>
<artifactId>experimental</artifactId>
- <version>1.7.1-SNAPSHOT</version>
+ <version>2.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
@@ -49,12 +49,10 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <elasticsearch.version>1.7.0</elasticsearch.version>
- <!-- For the Elasticsearch plugin to work this should match the version of
Lucene that Elasticsearch
+ <elasticsearch.version>2.0.0</elasticsearch.version>
+ <!-- For the Elasticsearch plugin to work this should match the version of
Lucene that Elasticsearch
uses. -->
- <lucene.version>4.10.4</lucene.version>
- <!-- Note that this has to be compatible with the Elasticsearch version
but won't match it. -->
- <elasticsearch.icu.version>2.7.0</elasticsearch.icu.version>
+ <lucene.version>5.2.1</lucene.version>
</properties>
<build>
@@ -277,6 +275,24 @@
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>hamcrest-core</artifactId>
+ <groupId>org.hamcrest</groupId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <version>1.10.19</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>hamcrest-core</artifactId>
+ <groupId>org.hamcrest</groupId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>com.carrotsearch.randomizedtesting</groupId>
@@ -285,15 +301,9 @@
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.mockito</groupId>
- <artifactId>mockito-all</artifactId>
- <version>1.9.5</version>
- <scope>test</scope>
- </dependency>
- <dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>16.0.1</version>
+ <version>18.0</version>
<scope>test</scope>
</dependency>
</dependencies>
--
To view, visit https://gerrit.wikimedia.org/r/281942
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7e9d332ad39cf07d23e1cbd05d86f0ce0c1a6ba5
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits