Manybubbles has uploaded a new change for review.
https://gerrit.wikimedia.org/r/150013
Change subject: Add option to skip highlighting if last matched
......................................................................
Add option to skip highlighting if last matched
How it works from the docs:
The ```skip_if_last_matched``` option can be used to entirely skip highlighting
if the last field matched. This can be used to form chains of fields only one
of which will return a match:
```js
"highlight": {
"type": "experimental",
"fields": {
"title": {},
"redirect": { "options": { "skip_if_last_matched": true } },
"section": { "options": { "skip_if_last_matched": true } },
"category": { "options": { "skip_if_last_matched": true } },
"foo": { "options": { "skip_if_last_matched": true } },
"text": {},
"aux_text": { "options": { "skip_if_last_matched": true } },
}
}
```
The above example will always highlighting title and text and only attempt to
highlight redirect and aux_text if there are no title or text matches,
respectively. It'll also only highlight section if there are no title and
redirect matches. Foo will only be highlighted if there are no title,
redirect, section, or category matches.
This needed to wait until Elasticsearch 1.3 because in 1.2 the fields were
highlighted in random order on the backend. In 1.3 they are highlighted
in the order in which they come back.
Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
---
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
A
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
2 files changed, 76 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/search/highlighter
refs/changes/13/150013/1
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
index bd3237c..d81d657 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
@@ -82,6 +82,8 @@
static class CacheEntry {
private final Map<QueryCacheKey, BasicQueryWeigher> queryWeighers =
new HashMap<>();
private Map<String, AutomatonHitEnum.Factory>
automatonHitEnumFactories;
+ private boolean lastMatched = false;
+ private int lastDocId = -1;
}
static class QueryCacheKey {
@@ -138,6 +140,10 @@
}
HighlightField highlight() throws IOException {
+ if (shouldSkip()) {
+ return null;
+ }
+
// TODO it might be possible to not build the weigher at all if
just using regex highlighting
ensureWeigher();
scoreMatters = context.field.fieldOptions().scoreOrdered();
@@ -154,6 +160,7 @@
List<Snippet> snippets = buildChooser().choose(segmenter,
buildHitEnum(),
numberOfSnippets);
if (snippets.size() != 0) {
+ cache.lastMatched = true;
return new HighlightField(context.fieldName,
formatSnippets(snippets));
}
int noMatchSize = context.field.fieldOptions().noMatchSize();
@@ -169,6 +176,20 @@
return new HighlightField(context.fieldName, new Text[]
{fragment});
}
+ private boolean shouldSkip() {
+ // Maintain lastMatched - it should be false if we shift to a new
doc.
+ if (cache.lastDocId != context.hitContext.docId()) {
+ cache.lastMatched = false;
+ cache.lastDocId = context.hitContext.docId();
+ }
+
+ Boolean skipIfLastMatched =
(Boolean)getOption("skip_if_last_matched");
+ if (skipIfLastMatched == null ? false : skipIfLastMatched &&
cache.lastMatched) {
+ return true;
+ }
+ return false;
+ }
+
void cleanup() throws Exception {
Exception lastCaught = null;
try {
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
new file mode 100644
index 0000000..393348e
--- /dev/null
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
@@ -0,0 +1,55 @@
+package org.wikimedia.highlighter.experimental.elasticsearch.integration;
+
+import static org.elasticsearch.index.query.QueryBuilders.termQuery;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.search.highlight.HighlightBuilder;
+import org.junit.Test;
+import
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+
+/**
+ * Tests for skipping highlighting.
+ */
+public class SkipTest extends
AbstractExperimentalHighlighterIntegrationTestBase {
+ @Test
+ public void skipIfLastMatched() throws IOException {
+ buildIndex();
+ indexTestData();
+
+ Map<String, Object> skipIf = new HashMap<>();
+ skipIf.put("skip_if_last_matched", true);
+
+ SearchRequestBuilder search = testSearch(termQuery("a",
"test")).setSize(1000)
+ .addHighlightedField(new
HighlightBuilder.Field("a").options(skipIf))
+ .addHighlightedField(new
HighlightBuilder.Field("b").options(skipIf))
+ .addHighlightedField(new
HighlightBuilder.Field("c").options(skipIf))
+ .addHighlightedField(new HighlightBuilder.Field("d"));
+ SearchResponse response = search.get();
+ assertHighlight(response, 0, "a", 0, equalTo("<em>test</em> a"));
+ assertNotHighlighted(response, 0, "b");
+ assertNotHighlighted(response, 0, "c");
+ assertHighlight(response, 0, "d", 0, equalTo("<em>test</em> d"));
+
+ assertHighlight(response, 1, "a", 0, equalTo("<em>test</em> a"));
+ assertNotHighlighted(response, 1, "b");
+ assertNotHighlighted(response, 1, "c");
+ assertHighlight(response, 1, "d", 0, equalTo("<em>test</em> d"));
+ }
+
+ protected void indexTestData() {
+ client().prepareIndex("test", "test", "1")
+ .setSource("a", "test a", "b", "test foo b", "c", "test foo
c", "d", "test d")
+ .get();
+ client().prepareIndex("test", "test", "2")
+ .setSource("a", "test a", "b", "test foo b", "c", "test foo
c", "d", "test d")
+ .get();
+ refresh();
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/150013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits