Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/150013

Change subject: Add option to skip highlighting if last matched
......................................................................

Add option to skip highlighting if last matched

How it works from the docs:
The ```skip_if_last_matched``` option can be used to entirely skip highlighting
if the last field matched.  This can be used to form chains of fields only one
of which will return a match:
```js
  "highlight": {
    "type": "experimental",
    "fields": {
      "title": {},
      "redirect": { "options": { "skip_if_last_matched": true } },
      "section": { "options": { "skip_if_last_matched": true } },
      "category": { "options": { "skip_if_last_matched": true } },
      "foo": { "options": { "skip_if_last_matched": true } },
      "text": {},
      "aux_text": { "options": { "skip_if_last_matched": true } },
    }
  }
```
The above example will always highlighting title and text and only attempt to
highlight redirect and aux_text if there are no title or text matches,
respectively.  It'll also only highlight section if there are no title and
redirect matches.  Foo will only be highlighted if there are no title,
redirect, section, or category matches.

This needed to wait until Elasticsearch 1.3 because in 1.2 the fields were
highlighted in random order on the backend.  In 1.3 they are highlighted
in the order in which they come back.

Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
---
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
A 
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
2 files changed, 76 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/highlighter 
refs/changes/13/150013/1

diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
index bd3237c..d81d657 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
@@ -82,6 +82,8 @@
     static class CacheEntry {
         private final Map<QueryCacheKey, BasicQueryWeigher> queryWeighers = 
new HashMap<>();
         private Map<String, AutomatonHitEnum.Factory> 
automatonHitEnumFactories;
+        private boolean lastMatched = false;
+        private int lastDocId = -1;
     }
 
     static class QueryCacheKey {
@@ -138,6 +140,10 @@
         }
 
         HighlightField highlight() throws IOException {
+            if (shouldSkip()) {
+                return null;
+            }
+
             // TODO it might be possible to not build the weigher at all if 
just using regex highlighting
             ensureWeigher();
             scoreMatters = context.field.fieldOptions().scoreOrdered();
@@ -154,6 +160,7 @@
             List<Snippet> snippets = buildChooser().choose(segmenter, 
buildHitEnum(),
                     numberOfSnippets);
             if (snippets.size() != 0) {
+                cache.lastMatched = true;
                 return new HighlightField(context.fieldName, 
formatSnippets(snippets));
             }
             int noMatchSize = context.field.fieldOptions().noMatchSize();
@@ -169,6 +176,20 @@
             return new HighlightField(context.fieldName, new Text[] 
{fragment});
         }
 
+        private boolean shouldSkip() {
+            // Maintain lastMatched - it should be false if we shift to a new 
doc.
+            if (cache.lastDocId != context.hitContext.docId()) {
+                cache.lastMatched = false;
+                cache.lastDocId = context.hitContext.docId();
+            }
+
+            Boolean skipIfLastMatched = 
(Boolean)getOption("skip_if_last_matched");
+            if (skipIfLastMatched == null ? false : skipIfLastMatched && 
cache.lastMatched) {
+                return true;
+            }
+            return false;
+        }
+
         void cleanup() throws Exception {
             Exception lastCaught = null;
             try {
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
new file mode 100644
index 0000000..393348e
--- /dev/null
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
@@ -0,0 +1,55 @@
+package org.wikimedia.highlighter.experimental.elasticsearch.integration;
+
+import static org.elasticsearch.index.query.QueryBuilders.termQuery;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.search.highlight.HighlightBuilder;
+import org.junit.Test;
+import 
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+
+/**
+ * Tests for skipping highlighting.
+ */
+public class SkipTest extends 
AbstractExperimentalHighlighterIntegrationTestBase {
+    @Test
+    public void skipIfLastMatched() throws IOException {
+        buildIndex();
+        indexTestData();
+
+        Map<String, Object> skipIf = new HashMap<>();
+        skipIf.put("skip_if_last_matched", true);
+
+        SearchRequestBuilder search = testSearch(termQuery("a", 
"test")).setSize(1000)
+                .addHighlightedField(new 
HighlightBuilder.Field("a").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("b").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("c").options(skipIf))
+                .addHighlightedField(new HighlightBuilder.Field("d"));
+        SearchResponse response = search.get();
+        assertHighlight(response, 0, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 0, "b");
+        assertNotHighlighted(response, 0, "c");
+        assertHighlight(response, 0, "d", 0, equalTo("<em>test</em> d"));
+
+        assertHighlight(response, 1, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 1, "b");
+        assertNotHighlighted(response, 1, "c");
+        assertHighlight(response, 1, "d", 0, equalTo("<em>test</em> d"));
+    }
+
+    protected void indexTestData() {
+        client().prepareIndex("test", "test", "1")
+                .setSource("a", "test a", "b", "test foo b", "c", "test foo 
c", "d", "test d")
+                .get();
+        client().prepareIndex("test", "test", "2")
+                .setSource("a", "test a", "b", "test foo b", "c", "test foo 
c", "d", "test d")
+                .get();
+        refresh();
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/150013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to