DCausse has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/371747 )
Change subject: Add new formatter to output offsets+text snippets
......................................................................
Add new formatter to output offsets+text snippets
Bug: T173231
Change-Id: I69deeca8589138fe512f6fc782ecc80897cf26bb
---
M
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
A
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
M
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
3 files changed, 85 insertions(+), 40 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/search/highlighter
refs/changes/47/371747/1
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
index 6163e36..a430d72 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
@@ -1,16 +1,5 @@
package org.wikimedia.highlighter.experimental.elasticsearch;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Pattern;
-
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.automaton.RegExp;
@@ -45,6 +34,17 @@
import org.wikimedia.search.highlighter.experimental.tools.GraphvizHit;
import org.wikimedia.search.highlighter.experimental.tools.GraphvizHitEnum;
import
org.wikimedia.search.highlighter.experimental.tools.GraphvizSnippetFormatter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
public class ExperimentalHighlighter implements Highlighter {
public static final String NAME = "experimental";
@@ -134,6 +134,7 @@
static class HighlightExecutionContext {
private static final String OPTION_RETURN_DEBUG_GRAPH =
"return_debug_graph";
+ private static final String OPTION_RETURN_SNIPPETS_WITH_OFFSET =
"return_snippets_and_offsets";
private static final int DEFAULT_MAX_DETERMINIZED_STATES = 20000;
private final HighlighterContext context;
private final CacheEntry cache;
@@ -522,6 +523,9 @@
formatter = new OffsetSnippetFormatter();
} else if (getOption(OPTION_RETURN_DEBUG_GRAPH, false)) {
formatter = new
GraphvizSnippetFormatter(defaultField.buildSourceExtracter());
+ } else if (getOption(OPTION_RETURN_SNIPPETS_WITH_OFFSET, false)) {
+ formatter = new OffsetAugmenterSnippetFormatter(new
SnippetFormatter.Default(defaultField.buildSourceExtracter(),
context.field.fieldOptions().preTags()[0],
+ context.field.fieldOptions().postTags()[0]));
} else {
formatter = new
SnippetFormatter.Default(defaultField.buildSourceExtracter(),
context.field.fieldOptions().preTags()[0],
context.field.fieldOptions().postTags()[0]);
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
new file mode 100644
index 0000000..e56caad
--- /dev/null
+++
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/OffsetAugmenterSnippetFormatter.java
@@ -0,0 +1,20 @@
+package org.wikimedia.highlighter.experimental.elasticsearch;
+
+import org.wikimedia.search.highlighter.experimental.Snippet;
+import org.wikimedia.search.highlighter.experimental.SnippetFormatter;
+
+public class OffsetAugmenterSnippetFormatter implements SnippetFormatter {
+ private static final OffsetSnippetFormatter OFFSETS = new
OffsetSnippetFormatter();
+ private final SnippetFormatter formatter;
+
+ public OffsetAugmenterSnippetFormatter(SnippetFormatter formatter) {
+ this.formatter = formatter;
+ }
+
+ @Override
+ public String format(Snippet snippet) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(OFFSETS.format(snippet));
+ return sb.append('|').append(formatter.format(snippet)).toString();
+ }
+}
diff --git
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
index 21eb5f4..002d307 100644
---
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
+++
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/MiscellaneousTest.java
@@ -1,8 +1,35 @@
package org.wikimedia.highlighter.experimental.elasticsearch.integration;
-import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Resources;
+import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.StopWatch;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.elasticsearch.index.query.BoolQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.junit.Test;
+import
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
+import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
import static
org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
@@ -19,34 +46,6 @@
import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-
-import com.google.common.collect.ImmutableList;
-
-import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse;
-import org.elasticsearch.action.bulk.BulkRequestBuilder;
-import org.elasticsearch.action.index.IndexRequestBuilder;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.StopWatch;
-import org.elasticsearch.common.unit.Fuzziness;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.json.JsonXContent;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
-import org.junit.Test;
-import
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.Resources;
/**
* Miscellaneous integration test that don't really have a good home.
@@ -431,6 +430,28 @@
}
@Test
+ public void offsetsAugmenter() throws IOException {
+ buildIndex();
+ indexTestData();
+ Map<String, Object> options = new HashMap<String, Object>();
+ options.put("return_snippets_and_offsets", true);
+ SearchResponse response = testSearch(matchQuery("test.english",
"test"),
+ x -> x.options(options).field("test.english")).get();
+ assertHighlight(response, 0, "test.english", 0,
equalTo("0:0-5,18-22:22|<em>tests</em> very simple <em>test</em>"));
+ }
+
+ @Test
+ public void offsetsAugmenterWithEmptyArray() throws IOException {
+ buildIndex();
+ indexTestData(Arrays.asList("", "after_empty_array"));
+ Map<String, Object> options = new HashMap<String, Object>();
+ options.put("return_snippets_and_offsets", true);
+ SearchResponse response = testSearch(matchQuery("test.english",
"after_empty_array"),
+ x -> x.options(options).field("test.english")).get();
+ assertHighlight(response, 0, "test.english", 0,
equalTo("1:1-18:18|<em>after_empty_array</em>"));
+ }
+
+ @Test
public void returnOffsetsMultiValued() throws IOException {
buildIndex();
indexTestData(ImmutableList.of("tests very simple test", "with more
test"));
--
To view, visit https://gerrit.wikimedia.org/r/371747
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I69deeca8589138fe512f6fc782ecc80897cf26bb
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits