DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/291844

Change subject: Fix security violations
......................................................................

Fix security violations

Plugins are now loaded in their own ClassLoader. It is no more possible to use
the package trick to access package visibles members/methods owned by
elasticsearch/lucene.  This patch removes all org.elasticsearch and
org.apache.lucene packages.  Unfortunately this was not seen by integration
tests because we use the same classloader.

Change-Id: Ic7d458b107d4b3f25d018f0792cbc02f38e0fb48
---
D 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java
R 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
R 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
R 
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java
M 
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java
7 files changed, 46 insertions(+), 52 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/highlighter 
refs/changes/44/291844/1

diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java
deleted file mode 100644
index 2becd87..0000000
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/RegexTooComplexException.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package org.elasticsearch.search.highlight;
-
-import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
-
-/**
- * Wraps Lucene's XTooComplexToDeterminizeException to be serializable to be
- * thrown over the wire.
- */
-public class RegexTooComplexException extends RuntimeException {
-    private static final long serialVersionUID = -41975279199116247L;
-
-    public RegexTooComplexException(TooComplexToDeterminizeException e) {
-        super(e.getMessage());
-    }
-}
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java
index 20e29d3..8d74fe0 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/DelayedSegmenter.java
@@ -3,7 +3,6 @@
 import java.io.IOException;
 
 import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.search.highlight.FieldWrapper;
 import org.wikimedia.search.highlighter.experimental.Segmenter;
 import org.wikimedia.search.highlighter.experimental.snippet.MultiSegmenter;
 
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
similarity index 94%
rename from 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
rename to 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
index 8b69d6b..6f8298e 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/ExperimentalHighlighter.java
@@ -1,4 +1,4 @@
-package org.elasticsearch.search.highlight;
+package org.wikimedia.highlighter.experimental.elasticsearch;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -13,7 +13,6 @@
 
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.automaton.RegExp;
-import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.ESLoggerFactory;
 import org.elasticsearch.common.text.StringAndBytesText;
@@ -23,16 +22,10 @@
 import org.elasticsearch.common.util.LocaleUtils;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.highlight.HighlightField;
+import org.elasticsearch.search.highlight.Highlighter;
+import org.elasticsearch.search.highlight.HighlighterContext;
 import org.elasticsearch.search.highlight.SearchContextHighlight.FieldOptions;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.BytesRefHashTermInfos;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.CharScanningSegmenterFactory;
-import org.wikimedia.highlighter.experimental.elasticsearch.DelayedSegmenter;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.ElasticsearchQueryFlattener;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.FetchedFieldIndexPicker;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.OffsetSnippetFormatter;
-import org.wikimedia.highlighter.experimental.elasticsearch.SegmenterFactory;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.SentenceIteratorSegmenterFactory;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.WholeSourceSegmenterFactory;
 import org.wikimedia.highlighter.experimental.lucene.hit.AutomatonHitEnum;
 import 
org.wikimedia.highlighter.experimental.lucene.hit.weight.BasicQueryWeigher;
 import org.wikimedia.search.highlighter.experimental.HitEnum;
@@ -363,20 +356,7 @@
         }
 
         private AutomatonHitEnum.Factory buildFactoryForRegex(RegExp regex) {
-            try {
-                return 
AutomatonHitEnum.factory(regex.toAutomaton(getMaxDeterminizedStates()));
-            } catch (TooComplexToDeterminizeException e) {
-                /*
-                 * Elasticsearch forces us to wrap the exception in a fully
-                 * Serializable exception and throw out the stack trace so we
-                 * give our future selves the oportunity to log it when we need
-                 * it.
-                 */
-                if (log.isDebugEnabled()) {
-                    log.debug("Regex too complex", e);
-                }
-                throw new RegexTooComplexException(e);
-            }
+            return 
AutomatonHitEnum.factory(regex.toAutomaton(getMaxDeterminizedStates()));
         }
 
         private int getMaxDeterminizedStates() {
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java
similarity index 87%
rename from 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
rename to 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java
index 551d067..bbc403e 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/FieldWrapper.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/FieldWrapper.java
@@ -1,7 +1,8 @@
-package org.elasticsearch.search.highlight;
+package org.wikimedia.highlighter.experimental.elasticsearch;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -12,12 +13,16 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.core.StringFieldMapper;
-import 
org.elasticsearch.search.highlight.ExperimentalHighlighter.HighlightExecutionContext;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.highlight.HighlighterContext;
+import org.elasticsearch.search.highlight.SearchContextHighlight;
 import org.elasticsearch.search.highlight.SearchContextHighlight.FieldOptions;
-import 
org.wikimedia.highlighter.experimental.elasticsearch.BytesRefTermWeigherCache;
-import org.wikimedia.highlighter.experimental.elasticsearch.SegmenterFactory;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.SourceLookup;
+import 
org.wikimedia.highlighter.experimental.elasticsearch.ExperimentalHighlighter.HighlightExecutionContext;
 import org.wikimedia.highlighter.experimental.lucene.hit.PostingsHitEnum;
 import org.wikimedia.highlighter.experimental.lucene.hit.TokenStreamHitEnum;
 import 
org.wikimedia.highlighter.experimental.lucene.hit.weight.BasicQueryWeigher;
@@ -38,6 +43,7 @@
 import 
org.wikimedia.search.highlighter.experimental.source.StringSourceExtracter;
 
 import com.google.common.base.Function;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterators;
 
 public class FieldWrapper {
@@ -105,7 +111,7 @@
 
     public List<String> getFieldValues() throws IOException {
         if (values == null) {
-            List<Object> objs = HighlightUtils.loadFieldValues(context.field, 
context.mapper,
+            List<Object> objs = loadFieldValues(context.field, context.mapper,
                     context.context, context.hitContext);
             values = new ArrayList<String>(objs.size());
             for (Object obj : objs) {
@@ -115,6 +121,30 @@
         return values;
     }
 
+    /*
+     * XXX: Copy/Pasted from HighlightUtils#loadFieldValues
+     */
+    static List<Object> loadFieldValues(SearchContextHighlight.Field field, 
FieldMapper mapper, SearchContext searchContext, FetchSubPhase.HitContext 
hitContext) throws IOException {
+        //percolator needs to always load from source, thus it sets the global 
force source to true
+        boolean forceSource = searchContext.highlight().forceSource(field);
+        List<Object> textsToHighlight;
+        if (!forceSource && mapper.fieldType().stored()) {
+            CustomFieldsVisitor fieldVisitor = new 
CustomFieldsVisitor(ImmutableSet.of(mapper.fieldType().names().indexName()), 
false);
+            hitContext.reader().document(hitContext.docId(), fieldVisitor);
+            textsToHighlight = 
fieldVisitor.fields().get(mapper.fieldType().names().indexName());
+            if (textsToHighlight == null) {
+                // Can happen if the document doesn't have the field to 
highlight
+                textsToHighlight = Collections.emptyList();
+            }
+        } else {
+            SourceLookup sourceLookup = searchContext.lookup().source();
+            sourceLookup.setSegmentAndDocument(hitContext.readerContext(), 
hitContext.docId());
+            textsToHighlight = 
sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName()));
+        }
+        assert textsToHighlight != null;
+        return textsToHighlight;
+    }
+
     public SourceExtracter<String> buildSourceExtracter() throws IOException {
         List<String> fieldValues = getFieldValues();
         switch (fieldValues.size()) {
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
index b93e676..c7c41cc 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/wikimedia/highlighter/experimental/elasticsearch/plugin/ExperimentalHighlighterModule.java
@@ -2,8 +2,8 @@
 
 import org.elasticsearch.common.inject.AbstractModule;
 import org.elasticsearch.common.inject.multibindings.MapBinder;
-import org.elasticsearch.search.highlight.ExperimentalHighlighter;
 import org.elasticsearch.search.highlight.Highlighter;
+import 
org.wikimedia.highlighter.experimental.elasticsearch.ExperimentalHighlighter;
 
 public class ExperimentalHighlighterModule extends AbstractModule {
     @Override
diff --git 
a/experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java
 
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java
similarity index 89%
rename from 
experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java
rename to 
experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java
index e1c65ff..08f23ec 100644
--- 
a/experimental-highlighter-lucene/src/main/java/org/apache/lucene/util/automaton/OffsetReturningRunAutomaton.java
+++ 
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/automaton/OffsetReturningRunAutomaton.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.util.automaton;
+package org.wikimedia.highlighter.experimental.lucene.automaton;
 
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.RunAutomaton;
@@ -22,7 +22,7 @@
      * @return the end offset of the matching string or -1 if no match
      */
     public int run(String s, int offset, int end) {
-        int p = initial;
+        int p = getInitialState();
         int i;
         int cp;
         int lastMatch = -1;
@@ -35,7 +35,7 @@
                 // luck.
                 return lastMatch;
             }
-            if (accept[p]) {
+            if (isAccept(p)) {
                 // We're matching right now so if we ever fail to match the 
rest
                 // of the string then we can roll back to here.
                 lastMatch = i + 1;
@@ -45,6 +45,6 @@
         // its not acceptable then we're half way through a potential match 
that
         // we'll never finish. If we have a last match then that was it -
         // otherwise no match.
-        return accept[p] ? i : lastMatch;
+        return isAccept(p) ? i : lastMatch;
     }
 }
diff --git 
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java
 
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java
index 8747c00..62a151d 100644
--- 
a/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java
+++ 
b/experimental-highlighter-lucene/src/main/java/org/wikimedia/highlighter/experimental/lucene/hit/AutomatonHitEnum.java
@@ -1,7 +1,7 @@
 package org.wikimedia.highlighter.experimental.lucene.hit;
 
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.OffsetReturningRunAutomaton;
+import 
org.wikimedia.highlighter.experimental.lucene.automaton.OffsetReturningRunAutomaton;
 import org.wikimedia.search.highlighter.experimental.hit.AbstractHitEnum;
 import org.wikimedia.search.highlighter.experimental.hit.HitWeigher;
 import 
org.wikimedia.search.highlighter.experimental.hit.weight.ConstantHitWeigher;

-- 
To view, visit https://gerrit.wikimedia.org/r/291844
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic7d458b107d4b3f25d018f0792cbc02f38e0fb48
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: 2.0
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to