DCausse has uploaded a new change for review. https://gerrit.wikimedia.org/r/291844
Change subject: Fix security violations ...................................................................... Fix security violations Plugins are now loaded in their own ClassLoader. It is no more possible to use the package trick to access package visibles members/methods owned by elasticsearch/lucene. This patch removes all org.elasticsearch and org.apache.lucene packages. Unfortunately this was not seen by integration tests because we use the same classloader. Change-Id: Ic7d458b107d4b3f25d018f0792cbc02f38e0fb48 --- D experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java M experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java R experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java R experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java M experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java R experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java M experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java 7 files changed, 46 insertions(+), 52 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/search/highlighter refs/changes/44/291844/1 diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java deleted file mode 100644 index 2becd87..0000000 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java +++ /dev/null @@ -1,15 +0,0 @@ -package org.elasticsearch.search.highlight; - -import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; - -/** - * Wraps Lucene's XTooComplexToDeterminizeException to be serializable to be - * thrown over the wire. - */ -public class RegexTooComplexException extends RuntimeException { - private static final long serialVersionUID = -41975279199116247L; - - public RegexTooComplexException(TooComplexToDeterminizeException e) { - super(e.getMessage()); - } -} diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java index 20e29d3..8d74fe0 100644 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java +++ b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java @@ -3,7 +3,6 @@ import java.io.IOException; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.search.highlight.FieldWrapper; import org.wikimedia.search.highlighter.experimental.Segmenter; import org.wikimedia.search.highlighter.experimental.snippet.MultiSegmenter; diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java similarity index 94% rename from experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java rename to experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java index 8b69d6b..6f8298e 100644 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java +++ b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java @@ -1,4 +1,4 @@ -package org.elasticsearch.search.highlight; +package org.wikimedia.highlighter.experimental.elasticsearch; import java.io.IOException; import java.util.ArrayList; @@ -13,7 +13,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.automaton.RegExp; -import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.text.StringAndBytesText; @@ -23,16 +22,10 @@ import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; +import org.elasticsearch.search.highlight.HighlightField; +import org.elasticsearch.search.highlight.Highlighter; +import org.elasticsearch.search.highlight.HighlighterContext; import org.elasticsearch.search.highlight.SearchContextHighlight.FieldOptions; -import org.wikimedia.highlighter.experimental.elasticsearch.BytesRefHashTermInfos; -import org.wikimedia.highlighter.experimental.elasticsearch.CharScanningSegmenterFactory; -import org.wikimedia.highlighter.experimental.elasticsearch.DelayedSegmenter; -import org.wikimedia.highlighter.experimental.elasticsearch.ElasticsearchQueryFlattener; -import org.wikimedia.highlighter.experimental.elasticsearch.FetchedFieldIndexPicker; -import org.wikimedia.highlighter.experimental.elasticsearch.OffsetSnippetFormatter; -import org.wikimedia.highlighter.experimental.elasticsearch.SegmenterFactory; -import org.wikimedia.highlighter.experimental.elasticsearch.SentenceIteratorSegmenterFactory; -import org.wikimedia.highlighter.experimental.elasticsearch.WholeSourceSegmenterFactory; import org.wikimedia.highlighter.experimental.lucene.hit.AutomatonHitEnum; import org.wikimedia.highlighter.experimental.lucene.hit.weight.BasicQueryWeigher; import org.wikimedia.search.highlighter.experimental.HitEnum; @@ -363,20 +356,7 @@ } private AutomatonHitEnum.Factory buildFactoryForRegex(RegExp regex) { - try { - return AutomatonHitEnum.factory(regex.toAutomaton(getMaxDeterminizedStates())); - } catch (TooComplexToDeterminizeException e) { - /* - * Elasticsearch forces us to wrap the exception in a fully - * Serializable exception and throw out the stack trace so we - * give our future selves the oportunity to log it when we need - * it. - */ - if (log.isDebugEnabled()) { - log.debug("Regex too complex", e); - } - throw new RegexTooComplexException(e); - } + return AutomatonHitEnum.factory(regex.toAutomaton(getMaxDeterminizedStates())); } private int getMaxDeterminizedStates() { diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java similarity index 87% rename from experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java rename to experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java index 551d067..bbc403e 100644 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java +++ b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java @@ -1,7 +1,8 @@ -package org.elasticsearch.search.highlight; +package org.wikimedia.highlighter.experimental.elasticsearch; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -12,12 +13,16 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.core.StringFieldMapper; -import org.elasticsearch.search.highlight.ExperimentalHighlighter.HighlightExecutionContext; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.highlight.HighlighterContext; +import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.highlight.SearchContextHighlight.FieldOptions; -import org.wikimedia.highlighter.experimental.elasticsearch.BytesRefTermWeigherCache; -import org.wikimedia.highlighter.experimental.elasticsearch.SegmenterFactory; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SourceLookup; +import org.wikimedia.highlighter.experimental.elasticsearch.ExperimentalHighlighter.HighlightExecutionContext; import org.wikimedia.highlighter.experimental.lucene.hit.PostingsHitEnum; import org.wikimedia.highlighter.experimental.lucene.hit.TokenStreamHitEnum; import org.wikimedia.highlighter.experimental.lucene.hit.weight.BasicQueryWeigher; @@ -38,6 +43,7 @@ import org.wikimedia.search.highlighter.experimental.source.StringSourceExtracter; import com.google.common.base.Function; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; public class FieldWrapper { @@ -105,7 +111,7 @@ public List<String> getFieldValues() throws IOException { if (values == null) { - List<Object> objs = HighlightUtils.loadFieldValues(context.field, context.mapper, + List<Object> objs = loadFieldValues(context.field, context.mapper, context.context, context.hitContext); values = new ArrayList<String>(objs.size()); for (Object obj : objs) { @@ -115,6 +121,30 @@ return values; } + /* + * XXX: Copy/Pasted from HighlightUtils#loadFieldValues + */ + static List<Object> loadFieldValues(SearchContextHighlight.Field field, FieldMapper mapper, SearchContext searchContext, FetchSubPhase.HitContext hitContext) throws IOException { + //percolator needs to always load from source, thus it sets the global force source to true + boolean forceSource = searchContext.highlight().forceSource(field); + List<Object> textsToHighlight; + if (!forceSource && mapper.fieldType().stored()) { + CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(ImmutableSet.of(mapper.fieldType().names().indexName()), false); + hitContext.reader().document(hitContext.docId(), fieldVisitor); + textsToHighlight = fieldVisitor.fields().get(mapper.fieldType().names().indexName()); + if (textsToHighlight == null) { + // Can happen if the document doesn't have the field to highlight + textsToHighlight = Collections.emptyList(); + } + } else { + SourceLookup sourceLookup = searchContext.lookup().source(); + sourceLookup.setSegmentAndDocument(hitContext.readerContext(), hitContext.docId()); + textsToHighlight = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName())); + } + assert textsToHighlight != null; + return textsToHighlight; + } + public SourceExtracter<String> buildSourceExtracter() throws IOException { List<String> fieldValues = getFieldValues(); switch (fieldValues.size()) { diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java index b93e676..c7c41cc 100644 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java +++ b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java @@ -2,8 +2,8 @@ import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.multibindings.MapBinder; -import org.elasticsearch.search.highlight.ExperimentalHighlighter; import org.elasticsearch.search.highlight.Highlighter; +import org.wikimedia.highlighter.experimental.elasticsearch.ExperimentalHighlighter; public class ExperimentalHighlighterModule extends AbstractModule { @Override diff --git a/experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java similarity index 89% rename from experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java rename to experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java index e1c65ff..08f23ec 100644 --- a/experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java +++ b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java @@ -1,4 +1,4 @@ -package org.apache.lucene.util.automaton; +package org.wikimedia.highlighter.experimental.lucene.automaton; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.RunAutomaton; @@ -22,7 +22,7 @@ * @return the end offset of the matching string or -1 if no match */ public int run(String s, int offset, int end) { - int p = initial; + int p = getInitialState(); int i; int cp; int lastMatch = -1; @@ -35,7 +35,7 @@ // luck. return lastMatch; } - if (accept[p]) { + if (isAccept(p)) { // We're matching right now so if we ever fail to match the rest // of the string then we can roll back to here. lastMatch = i + 1; @@ -45,6 +45,6 @@ // its not acceptable then we're half way through a potential match that // we'll never finish. If we have a last match then that was it - // otherwise no match. - return accept[p] ? i : lastMatch; + return isAccept(p) ? i : lastMatch; } } diff --git a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java index 8747c00..62a151d 100644 --- a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java +++ b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java @@ -1,7 +1,7 @@ package org.wikimedia.highlighter.experimental.lucene.hit; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.OffsetReturningRunAutomaton; +import org.wikimedia.highlighter.experimental.lucene.automaton.OffsetReturningRunAutomaton; import org.wikimedia.search.highlighter.experimental.hit.AbstractHitEnum; import org.wikimedia.search.highlighter.experimental.hit.HitWeigher; import org.wikimedia.search.highlighter.experimental.hit.weight.ConstantHitWeigher; -- To view, visit https://gerrit.wikimedia.org/r/291844 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic7d458b107d4b3f25d018f0792cbc02f38e0fb48 Gerrit-PatchSet: 1 Gerrit-Project: search/highlighter Gerrit-Branch: 2.0 Gerrit-Owner: DCausse <dcau...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits