DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/371747 )

Change subject: Add new formatter to output offsets+text snippets
......................................................................

Add new formatter to output offsets+text snippets

Bug: T173231
Change-Id: I69deeca8589138fe512f6fc782ecc80897cf26bb
---
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
A 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
M 
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
3 files changed, 85 insertions(+), 40 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/highlighter 
refs/changes/47/371747/1

diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
index 6163e36..a430d72 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
@@ -1,16 +1,5 @@
 package org.wikimedia.highlighter.experimental.elasticsearch;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Pattern;
-
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.automaton.RegExp;
@@ -45,6 +34,17 @@
 import org.wikimedia.search.highlighter.experimental.tools.GraphvizHit;
 import org.wikimedia.search.highlighter.experimental.tools.GraphvizHitEnum;
 import 
org.wikimedia.search.highlighter.experimental.tools.GraphvizSnippetFormatter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
 
 public class ExperimentalHighlighter implements Highlighter {
     public static final String NAME = "experimental";
@@ -134,6 +134,7 @@
 
     static class HighlightExecutionContext {
         private static final String OPTION_RETURN_DEBUG_GRAPH = 
"return_debug_graph";
+        private static final String OPTION_RETURN_SNIPPETS_WITH_OFFSET = 
"return_snippets_and_offsets";
         private static final int DEFAULT_MAX_DETERMINIZED_STATES = 20000;
         private final HighlighterContext context;
         private final CacheEntry cache;
@@ -522,6 +523,9 @@
                 formatter = new OffsetSnippetFormatter();
             } else if (getOption(OPTION_RETURN_DEBUG_GRAPH, false)) {
                 formatter = new 
GraphvizSnippetFormatter(defaultField.buildSourceExtracter());
+            } else if (getOption(OPTION_RETURN_SNIPPETS_WITH_OFFSET, false)) {
+                formatter = new OffsetAugmenterSnippetFormatter(new 
SnippetFormatter.Default(defaultField.buildSourceExtracter(), 
context.field.fieldOptions().preTags()[0],
+                        context.field.fieldOptions().postTags()[0]));
             } else {
                 formatter = new 
SnippetFormatter.Default(defaultField.buildSourceExtracter(), 
context.field.fieldOptions().preTags()[0],
                         context.field.fieldOptions().postTags()[0]);
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
new file mode 100644
index 0000000..e56caad
--- /dev/null
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
@@ -0,0 +1,20 @@
+package org.wikimedia.highlighter.experimental.elasticsearch;
+
+import org.wikimedia.search.highlighter.experimental.Snippet;
+import org.wikimedia.search.highlighter.experimental.SnippetFormatter;
+
+public class OffsetAugmenterSnippetFormatter implements SnippetFormatter {
+    private static final OffsetSnippetFormatter OFFSETS = new 
OffsetSnippetFormatter();
+    private final SnippetFormatter formatter;
+
+    public OffsetAugmenterSnippetFormatter(SnippetFormatter formatter) {
+        this.formatter = formatter;
+    }
+
+    @Override
+    public String format(Snippet snippet) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(OFFSETS.format(snippet));
+        return sb.append('|').append(formatter.format(snippet)).toString();
+    }
+}
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
index 21eb5f4..002d307 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
@@ -1,8 +1,35 @@
 package org.wikimedia.highlighter.experimental.elasticsearch.integration;
 
-import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Resources;
+import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.StopWatch;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.elasticsearch.index.query.BoolQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.junit.Test;
+import 
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+
 import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
 import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
+import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
 import static 
org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
 import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
 import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
@@ -19,34 +46,6 @@
 import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-
-import com.google.common.collect.ImmutableList;
-
-import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse;
-import org.elasticsearch.action.bulk.BulkRequestBuilder;
-import org.elasticsearch.action.index.IndexRequestBuilder;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.StopWatch;
-import org.elasticsearch.common.unit.Fuzziness;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.json.JsonXContent;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
-import org.junit.Test;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.Resources;
 
 /**
  * Miscellaneous integration test that don't really have a good home.
@@ -431,6 +430,28 @@
     }
 
     @Test
+    public void offsetsAugmenter() throws IOException {
+        buildIndex();
+        indexTestData();
+        Map<String, Object> options = new HashMap<String, Object>();
+        options.put("return_snippets_and_offsets", true);
+        SearchResponse response = testSearch(matchQuery("test.english", 
"test"),
+                x -> x.options(options).field("test.english")).get();
+        assertHighlight(response, 0, "test.english", 0, 
equalTo("0:0-5,18-22:22|<em>tests</em> very simple <em>test</em>"));
+    }
+
+    @Test
+    public void offsetsAugmenterWithEmptyArray() throws IOException {
+        buildIndex();
+        indexTestData(Arrays.asList("", "after_empty_array"));
+        Map<String, Object> options = new HashMap<String, Object>();
+        options.put("return_snippets_and_offsets", true);
+        SearchResponse response = testSearch(matchQuery("test.english", 
"after_empty_array"),
+                x -> x.options(options).field("test.english")).get();
+        assertHighlight(response, 0, "test.english", 0, 
equalTo("1:1-18:18|<em>after_empty_array</em>"));
+    }
+
+    @Test
     public void returnOffsetsMultiValued() throws IOException {
         buildIndex();
         indexTestData(ImmutableList.of("tests very simple test", "with more 
test"));

-- 
To view, visit https://gerrit.wikimedia.org/r/371747
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I69deeca8589138fe512f6fc782ecc80897cf26bb
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to