DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/219182

Change subject: WIP: Add Safeifier ActionModule to verify wildcard before 
compiling
......................................................................

WIP: Add Safeifier ActionModule to verify wildcard before compiling

I will comment this patch in the phab ticket

Bug: T102589
Change-Id: Ifcc9e55a4d81d48c05a76ca1afdd9b3adbf86006
---
M pom.xml
A src/main/java/org/elasticsearch/index/query/UnsafeSaferQueryParseContext.java
M src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
M src/main/java/org/wikimedia/search/extra/safer/Safeifier.java
M src/main/java/org/wikimedia/search/extra/safer/SaferQueryBuilder.java
M src/main/java/org/wikimedia/search/extra/safer/SaferQueryParser.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsAction.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModule.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModuleParser.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/DelayedWildcardQuery.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/SaferMapperQueryParser.java
A 
src/main/java/org/wikimedia/search/extra/safer/wildcard/TooManyWildcardsException.java
A src/test/java/org/wikimedia/search/extra/safer/CheckWildcardsTest.java
M src/test/java/org/wikimedia/search/extra/safer/SaferQueryStringTest.java
14 files changed, 749 insertions(+), 30 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/extra 
refs/changes/82/219182/1

diff --git a/pom.xml b/pom.xml
index ec14044..a01aead 100644
--- a/pom.xml
+++ b/pom.xml
@@ -43,7 +43,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <elasticsearch.version>1.3.4</elasticsearch.version>
+    <elasticsearch.version>1.3.6</elasticsearch.version>
     <lucene.version>4.9.1</lucene.version>
   </properties>
 
diff --git 
a/src/main/java/org/elasticsearch/index/query/UnsafeSaferQueryParseContext.java 
b/src/main/java/org/elasticsearch/index/query/UnsafeSaferQueryParseContext.java
new file mode 100644
index 0000000..54dfa4c
--- /dev/null
+++ 
b/src/main/java/org/elasticsearch/index/query/UnsafeSaferQueryParseContext.java
@@ -0,0 +1,290 @@
+package org.elasticsearch.index.query;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.Set;
+
+import org.apache.lucene.queryparser.classic.MapperQueryParser;
+import org.apache.lucene.queryparser.classic.QueryParserSettings;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.similarities.Similarity;
+import org.elasticsearch.cache.recycler.CacheRecycler;
+import org.elasticsearch.common.ParseField.Flag;
+import org.elasticsearch.common.collect.ImmutableMap;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.common.lucene.search.NoCacheQuery;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.analysis.AnalysisService;
+import org.elasticsearch.index.cache.filter.support.CacheKeyFilter.Key;
+import org.elasticsearch.index.cache.query.parser.QueryParserCache;
+import org.elasticsearch.index.engine.IndexEngine;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.MapperService.SmartNameFieldMappers;
+import org.elasticsearch.index.mapper.MapperService.SmartNameObjectMapper;
+import org.elasticsearch.index.search.child.CustomQueryWrappingFilter;
+import org.elasticsearch.index.similarity.SimilarityService;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.search.lookup.SearchLookup;
+import org.wikimedia.search.extra.safer.SaferQueryParser;
+import org.wikimedia.search.extra.safer.wildcard.CheckWildcardsActionModule;
+import org.wikimedia.search.extra.safer.wildcard.SaferMapperQueryParser;
+
+/**
+ * Hack for {@link CheckWildcardsActionModule}
+ *
+ * This class is just here to allow a custom MapperQueryParser to be used into 
{@link QueryStringQueryParser}
+ * XXX: Potential flaw, this object is stored in a thread local cache in 
{@link SaferQueryParser} 
+ *
+ * The normal QueryParseContext seems to be "index scoped" (see the thread 
local cache {@link IndexQueryParserService})
+ * but this one will be node scoped (it's not clear to me
+ *
+ * in org.elasticsearch.index.query because we need package visibility for 
{@link QueryParseContext#indexQueryParser}
+ */
+public class UnsafeSaferQueryParseContext extends QueryParseContext {
+    private QueryParseContext parseContext = null;
+    private final SaferMapperQueryParser queryParser;
+
+    public UnsafeSaferQueryParseContext() {
+        // XXX: Don't store anything in the base class
+        super(null, null);
+        this.queryParser = new SaferMapperQueryParser(this);
+    }
+
+    /**
+     * XXX: don't forget to {@link #cleanUp()}
+     * @param parseContext
+     */
+    public void setDelegate(QueryParseContext parseContext) {
+        this.parseContext = parseContext;
+    }
+
+    public QueryParseContext getDelegate() {
+        return parseContext;
+    }
+
+    /**
+     * Release the inner parseContext instance
+     */
+    public void cleanUp() {
+        this.parseContext = null;
+    }
+
+    /**
+     * XXX: we send our SaferMapperQueryParser
+     */
+    public MapperQueryParser queryParser(QueryParserSettings settings) {
+        queryParser.reset(settings);
+        return queryParser;
+    }
+
+    /**
+     * XXX: not sure
+     */
+    @Override
+    public void reset(XContentParser jp) {
+        parseContext.reset(jp);
+    }
+
+    /**
+     * Copy/Paste from {@link QueryParseContext#parseInnerQuery()}.
+     *
+     * This method will fail if NoCacheQuery is returned of if 
CustomQueryWrappingFilter.shouldUseCustomQueryWrappingFilter
+     * returns true. This is because we cannot maintain properly the state of 
the QueryParseContext (propagateNoCache and
+     * requireCustomQueryWrappingFilter booleans)
+     *
+     * @param parseContext
+     * @return the query
+     * @throws IOException
+     * @throws QueryParsingException
+     */
+    @Override
+    public Query parseInnerQuery() throws IOException, QueryParsingException {
+        XContentParser parser = parseContext.parser();
+        Index index = parseContext.index();
+
+        // move to START object
+        XContentParser.Token token;
+        if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
+            token = parser.nextToken();
+            if (token != XContentParser.Token.START_OBJECT) {
+                throw new QueryParsingException(index, "[_na] query malformed, 
must start with start_object");
+            }
+        }
+        token = parser.nextToken();
+        if (token != XContentParser.Token.FIELD_NAME) {
+            throw new QueryParsingException(index, "[_na] query malformed, no 
field after start_object");
+        }
+        String queryName = parser.currentName();
+        // move to the next START_OBJECT
+        token = parser.nextToken();
+        if (token != XContentParser.Token.START_OBJECT && token != 
XContentParser.Token.START_ARRAY) {
+            throw new QueryParsingException(index, "[_na] query malformed, no 
field after start_object");
+        }
+
+        QueryParser queryParser = 
parseContext.indexQueryParser.queryParser(queryName);
+        if (queryParser == null) {
+            throw new QueryParsingException(index, "No query registered for [" 
+ queryName + "]");
+        }
+        // XXX: hack is here, if we detect a query_string we wrap a new 
QueryParseContext that
+        // will send our own MapperQueryParser.
+        final Query result;
+        if(queryParser instanceof QueryStringQueryParser) {
+            // This could potentially propagate our 
UnsafeSaferQueryParseContext but hopefully
+            // query_string doesn't have inner ES dsl queries.
+            result = queryParser.parse(this);
+        } else {
+            result = queryParser.parse(parseContext);
+        }
+        if (parser.currentToken() == XContentParser.Token.END_OBJECT || 
parser.currentToken() == XContentParser.Token.END_ARRAY) {
+            // if we are at END_OBJECT, move to the next one...
+            parser.nextToken();
+        }
+
+        // XXX: We fail here because we can't maintain the state of 
QueryParseContext properly
+        if (result instanceof NoCacheQuery) {
+            throw new QueryParsingException(index, "Safeifier does not support 
NoCacheQuery");
+        }
+        if 
(CustomQueryWrappingFilter.shouldUseCustomQueryWrappingFilter(result)) {
+            throw new QueryParsingException(index, "Safeifier does not support 
NoCacheQuery");
+        }
+        return result;
+    }
+
+    @Override
+    public void parseFlags(EnumSet<Flag> parseFlags) {
+        parseContext.parseFlags(parseFlags);
+    }
+    @Override
+    public EnumSet<Flag> parseFlags() {
+        return parseContext.parseFlags();
+    }
+    @Override
+    public Index index() {
+        return parseContext.index();
+    }
+    @Override
+    public void parser(XContentParser parser) {
+        parseContext.parser(parser);
+    }
+    @Override
+    public XContentParser parser() {
+        return parseContext.parser();
+    }
+    @Override
+    public AnalysisService analysisService() {
+        return parseContext.analysisService();
+    }
+    @Override
+    public CacheRecycler cacheRecycler() {
+        return parseContext.cacheRecycler();
+    }
+    @Override
+    public ScriptService scriptService() {
+        return parseContext.scriptService();
+    }
+    @Override
+    public MapperService mapperService() {
+        return parseContext.mapperService();
+    }
+    @Override
+    public IndexEngine indexEngine() {
+        return parseContext.indexEngine();
+    }
+    @Override
+    public SimilarityService similarityService() {
+        return parseContext.similarityService();
+    }
+    @Override
+    public Similarity searchSimilarity() {
+        return parseContext.searchSimilarity();
+    }
+    @Override
+    public QueryParserCache queryParserCache() {
+        return parseContext.queryParserCache();
+    }
+    @Override
+    public String defaultField() {
+        return parseContext.defaultField();
+    }
+    @Override
+    public boolean queryStringLenient() {
+        return parseContext.queryStringLenient();
+    }
+    @Override
+    public Filter cacheFilter(Filter filter, Key cacheKey) {
+        return parseContext.cacheFilter(filter, cacheKey);
+    }
+    @Override
+    public <IFD extends IndexFieldData<?>> IFD getForField(
+            FieldMapper<?> mapper) {
+        return parseContext.getForField(mapper);
+    }
+    @Override
+    public void addNamedFilter(String name, Filter filter) {
+        parseContext.addNamedFilter(name, filter);
+    }
+    @Override
+    public void addNamedQuery(String name, Query query) {
+        parseContext.addNamedQuery(name, query);
+    }
+    @Override
+    public ImmutableMap<String, Filter> copyNamedFilters() {
+        return parseContext.copyNamedFilters();
+    }
+    @Override
+    public Filter parseInnerFilter() throws IOException,
+            QueryParsingException {
+        return parseContext.parseInnerFilter();
+    }
+    @Override
+    public Filter parseInnerFilter(String filterName) throws IOException,
+            QueryParsingException {
+        return parseContext.parseInnerFilter(filterName);
+    }
+    @Override
+    public FieldMapper fieldMapper(String name) {
+        return parseContext.fieldMapper(name);
+    }
+    @Override
+    public String indexName(String name) {
+        return parseContext.indexName(name);
+    }
+    @Override
+    public Set<String> simpleMatchToIndexNames(String pattern) {
+        return parseContext.simpleMatchToIndexNames(pattern);
+    }
+    @Override
+    public SmartNameFieldMappers smartFieldMappers(String name) {
+        return parseContext.smartFieldMappers(name);
+    }
+    @Override
+    public FieldMapper smartNameFieldMapper(String name) {
+        return parseContext.smartNameFieldMapper(name);
+    }
+    @Override
+    public SmartNameObjectMapper smartObjectMapper(String name) {
+        return parseContext.smartObjectMapper(name);
+    }
+    @Override
+    public Collection<String> queryTypes() {
+        return parseContext.queryTypes();
+    }
+    @Override
+    public SearchLookup lookup() {
+        return parseContext.lookup();
+    }
+    @Override
+    public long nowInMillis() {
+        return parseContext.nowInMillis();
+    }
+    @Override
+    public boolean requireCustomQueryWrappingFilter() {
+        return parseContext.requireCustomQueryWrappingFilter();
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java 
b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
index abaa8dd..937525d 100644
--- a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
+++ b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
@@ -19,6 +19,7 @@
 import org.wikimedia.search.extra.safer.SaferQueryParser;
 import 
org.wikimedia.search.extra.safer.phrase.PhraseTooLargeActionModuleParser;
 import org.wikimedia.search.extra.safer.simple.SimpleActionModuleParser;
+import 
org.wikimedia.search.extra.safer.wildcard.CheckWildcardsActionModuleParser;
 import org.wikimedia.search.extra.superdetectnoop.ChangeHandler;
 import org.wikimedia.search.extra.superdetectnoop.SetHandler;
 import org.wikimedia.search.extra.superdetectnoop.SuperDetectNoopScript;
@@ -78,6 +79,7 @@
             Multibinder<ActionModuleParser> moduleParsers = 
Multibinder.newSetBinder(binder(), ActionModuleParser.class);
             
moduleParsers.addBinding().to(PhraseTooLargeActionModuleParser.class).asEagerSingleton();
             
moduleParsers.addBinding().to(SimpleActionModuleParser.class).asEagerSingleton();
+            
moduleParsers.addBinding().to(CheckWildcardsActionModuleParser.class).asEagerSingleton();
         }
     }
 
diff --git a/src/main/java/org/wikimedia/search/extra/safer/Safeifier.java 
b/src/main/java/org/wikimedia/search/extra/safer/Safeifier.java
index 0261843..e0b2465 100644
--- a/src/main/java/org/wikimedia/search/extra/safer/Safeifier.java
+++ b/src/main/java/org/wikimedia/search/extra/safer/Safeifier.java
@@ -59,4 +59,13 @@
         }
         return action.apply(this, q);
     }
+
+    /**
+     * Inspect registry
+     * @param action
+     * @return true if the safeifier handles this type of query
+     */
+    public boolean hasActionFor(Class<? extends Query> queryType) {
+        return registry.containsKey(queryType);
+    }
 }
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/SaferQueryBuilder.java 
b/src/main/java/org/wikimedia/search/extra/safer/SaferQueryBuilder.java
index 681a54b..01c725c 100644
--- a/src/main/java/org/wikimedia/search/extra/safer/SaferQueryBuilder.java
+++ b/src/main/java/org/wikimedia/search/extra/safer/SaferQueryBuilder.java
@@ -8,6 +8,7 @@
 import org.elasticsearch.index.query.QueryBuilder;
 import org.wikimedia.search.extra.safer.phrase.PhraseTooLargeAction;
 import org.wikimedia.search.extra.safer.simple.SimpleActionModule.Option;
+import org.wikimedia.search.extra.safer.wildcard.CheckWildcardsAction;
 
 public class SaferQueryBuilder extends BaseQueryBuilder {
     private final QueryBuilder delegate;
@@ -16,6 +17,8 @@
     private PhraseTooLargeAction phraseTooLargeAction;
     private Option termRangeQuery;
     private Option numericRangeQuery;
+    private Integer maxWildcards;
+    private CheckWildcardsAction checkWildcardsAction;
 
     public SaferQueryBuilder(QueryBuilder delegate) {
         this.delegate = delegate;
@@ -46,6 +49,16 @@
         return this;
     }
 
+    public SaferQueryBuilder maxWildcards(int wildcards) {
+        maxWildcards = wildcards;
+        return this;
+    }
+
+    public SaferQueryBuilder checkWildCardAction(CheckWildcardsAction 
checkWildcardsAction) {
+        this.checkWildcardsAction = checkWildcardsAction;
+        return this;
+    }
+
     @Override
     protected void doXContent(XContentBuilder builder, Params params) throws 
IOException {
         builder.startObject("safer");
@@ -69,6 +82,14 @@
             builder.field("numeric_range", 
numericRangeQuery.toString().toLowerCase(Locale.ROOT));
         }
         builder.endObject();
+        builder.startObject("wildcards");
+        if(maxWildcards != null) {
+            builder.field("max_wildcards", maxWildcards);
+        }
+        if(checkWildcardsAction != null) {
+            builder.field("check_wildcards_action", 
checkWildcardsAction.toString().toLowerCase(Locale.ROOT));
+        }
+        builder.endObject();
         builder.endObject();
     }
 }
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/SaferQueryParser.java 
b/src/main/java/org/wikimedia/search/extra/safer/SaferQueryParser.java
index 19b3cfe..233db2d 100644
--- a/src/main/java/org/wikimedia/search/extra/safer/SaferQueryParser.java
+++ b/src/main/java/org/wikimedia/search/extra/safer/SaferQueryParser.java
@@ -6,7 +6,9 @@
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.queryparser.classic.MapperQueryParser;
 import org.apache.lucene.search.Query;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.collect.ImmutableMap;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.xcontent.XContentParser;
@@ -14,10 +16,26 @@
 import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.index.query.QueryParser;
 import org.elasticsearch.index.query.QueryParsingException;
+import org.elasticsearch.index.query.UnsafeSaferQueryParseContext;
 import org.wikimedia.search.extra.safer.Safeifier.ActionModule;
+import org.wikimedia.search.extra.safer.wildcard.CheckWildcardsActionModule;
+import org.wikimedia.search.extra.safer.wildcard.DelayedWildcardQuery;
 
 public class SaferQueryParser implements QueryParser {
     private final Map<String, ActionModuleParser<?>> moduleParsers;
+
+    /**
+     * Hack for {@link CheckWildcardsActionModule}
+     *
+     * We need to cache the {@link UnsafeSaferQueryParseContext} instances 
mainly because they contain an
+     * instance of a {@link MapperQueryParser} that may be slow to build.
+     */
+    private ThreadLocal<UnsafeSaferQueryParseContext> cache = new 
ThreadLocal<UnsafeSaferQueryParseContext>() {
+        @Override
+        protected UnsafeSaferQueryParseContext initialValue() {
+            return new UnsafeSaferQueryParseContext();
+        }
+    };
 
     @Inject
     public SaferQueryParser(@SuppressWarnings("rawtypes") 
Set<ActionModuleParser> parsers) {
@@ -37,45 +55,71 @@
     public Query parse(QueryParseContext parseContext) throws IOException, 
QueryParsingException {
         boolean errorOnUnknownQueryType = true;
         List<ActionModule> actionModules = new ArrayList<ActionModule>();
+        UnsafeSaferQueryParseContext unsafeContext = null;
         Query delegate = null;
 
         XContentParser parser = parseContext.parser();
         String currentFieldName = null;
         XContentParser.Token token;
-        while ((token = parser.nextToken()) != 
XContentParser.Token.END_OBJECT) {
-            if (token == XContentParser.Token.FIELD_NAME) {
-                currentFieldName = parser.currentName();
-            } else if (token == Token.START_OBJECT) {
-                switch (currentFieldName) {
-                case "query":
-                    if (delegate != null) {
-                        throw new QueryParsingException(parseContext.index(), 
"[safer] Can only wrap a single query ([" + currentFieldName
-                                + "]) is the second query.");
+        try {
+            unsafeContext = getUnsafeContext(parseContext);
+            while ((token = parser.nextToken()) != 
XContentParser.Token.END_OBJECT) {
+                if (token == XContentParser.Token.FIELD_NAME) {
+                    currentFieldName = parser.currentName();
+                } else if (token == Token.START_OBJECT) {
+                    switch (currentFieldName) {
+                    case "query":
+                        if (delegate != null) {
+                            throw new 
QueryParsingException(parseContext.index(), "[safer] Can only wrap a single 
query ([" + currentFieldName
+                                    + "]) is the second query.");
+                        }
+                        // It'd be nice to know if the CheckWildcards module 
is needed before wrapping... but how?
+                        // 2 pass parsing?
+                        delegate = unsafeContext.parseInnerQuery();
+                        break;
+                    default:
+                        ActionModuleParser<?> moduleParser = 
moduleParsers.get(currentFieldName);
+                        if (moduleParser == null) {
+                            throw new 
QueryParsingException(parseContext.index(), "[safer] query does not support the 
object [" + currentFieldName + "]");
+                        }
+                        actionModules.add(moduleParser.parse(parseContext));
                     }
-                    delegate = parseContext.parseInnerQuery();
-                    break;
-                default:
-                    ActionModuleParser<?> moduleParser = 
moduleParsers.get(currentFieldName);
-                    if (moduleParser == null) {
-                        throw new QueryParsingException(parseContext.index(), 
"[safer] query does not support the object [" + currentFieldName + "]");
+                } else if (token.isValue()) {
+                    switch (currentFieldName) {
+                    case "error_on_unknown":
+                    case "errorOnUnknown":
+                        errorOnUnknownQueryType = parser.booleanValue();
+                        break;
+                    default:
+                        throw new QueryParsingException(parseContext.index(), 
"[safer] query does not support the field [" + currentFieldName + "]");
                     }
-                    actionModules.add(moduleParser.parse(parseContext));
-                }
-            } else if (token.isValue()) {
-                switch (currentFieldName) {
-                case "error_on_unknown":
-                case "errorOnUnknown":
-                    errorOnUnknownQueryType = parser.booleanValue();
-                    break;
-                default:
-                    throw new QueryParsingException(parseContext.index(), 
"[safer] query does not support the field [" + currentFieldName + "]");
                 }
             }
-        }
 
-        if (delegate == null) {
-            throw new QueryParsingException(parseContext.index(), "[safer] 
requires a query");
+            if (delegate == null) {
+                throw new QueryParsingException(parseContext.index(), "[safer] 
requires a query");
+            }
+
+            Safeifier safeifier = new Safeifier(errorOnUnknownQueryType, 
actionModules);
+
+            // Hack for CheckWildcards if the module is not requested: we must 
remove all DelayedWildcardQuery.
+            if(!safeifier.hasActionFor(DelayedWildcardQuery.class)) {
+                safeifier.register(DelayedWildcardQuery.class, 
CheckWildcardsActionModule.PARSE_DELAYED_WILDCARD);
+            }
+
+            return safeifier.safeify(delegate);
+        } finally {
+            if(unsafeContext != null) {
+                // Prevent memleak
+                unsafeContext.cleanUp();
+            }
         }
-        return new Safeifier(errorOnUnknownQueryType, 
actionModules).safeify(delegate);
+    }
+
+    @Nullable
+    private UnsafeSaferQueryParseContext getUnsafeContext(QueryParseContext 
parseContext) throws IOException, QueryParsingException {
+        UnsafeSaferQueryParseContext unsafe = cache.get();
+        unsafe.setDelegate(parseContext);
+        return unsafe;
     }
 }
\ No newline at end of file
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsAction.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsAction.java
new file mode 100644
index 0000000..ff21f26
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsAction.java
@@ -0,0 +1,97 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+import java.util.Locale;
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.ElasticsearchParseException;
+
+public enum CheckWildcardsAction {
+       /**
+        * Cut the string
+        */
+       BREAK {
+               @Override
+               public Query perform(DelayedWildcardQuery query, int 
maxWildcards) {
+                       int index = getNthWildcardIndex(query.getTermText(), 
maxWildcards+1);
+                       String term = query.getTermText();
+                       if(index > 0) {
+                               term = query.getTermText().substring(0, index);
+                       } else if(index == 0) {
+                               throw new ElasticsearchParseException("Wildcard 
query cannot be compiled (max_wildcards = 0 ??)");
+                       }
+                       try {
+                               return query.compileWildcard(term);
+                       } catch (ParseException e) {
+                               throw new 
ElasticsearchParseException(e.getMessage(), e);
+                       }
+               }
+       },
+       /**
+        * throw an error
+        */
+       ERROR {
+               @Override
+               public Query perform(DelayedWildcardQuery query, int 
maxWildcards) {
+                       int index = getNthWildcardIndex(query.getTermText(), 
maxWildcards+1);
+                       if(index >= 0) {
+                               throw new TooManyWildcardsException("WildCard 
query is too complex : " + query.getTermText());
+                       }
+                       try {
+                               return query.compileWildcard();
+                       } catch (ParseException e) {
+                               throw new 
ElasticsearchParseException(e.getMessage(), e);
+                       }
+               }
+       };
+
+       public abstract Query perform(DelayedWildcardQuery query, int 
maxWildcards);
+
+       public static CheckWildcardsAction parse(String text) {
+               if("break".equals(text)) {
+                       return BREAK;
+               }
+               if("error".equals(text)) {
+                       return ERROR;
+               }
+        throw new 
ElasticsearchIllegalArgumentException(String.format(Locale.ROOT, "Invalid 
check_wildcards_action mode:  %s", text));
+       }
+
+       /**
+        * Find the nth wildcard (* or ?) in the text
+        *
+        * @param text
+        * @param nth
+        * @return return the index of the nth wildcards in the text or -1 if 
not found
+        */
+       public static int getNthWildcardIndex(String text, int nth) {
+               if(nth < 1) {
+                       throw new IllegalArgumentException("nth must be > 0");
+               }
+               // Code borrowed from WildcardQuery
+               int wildcards = 0;
+               for (int i = 0; i < text.length();) {
+                       final int c = text.codePointAt(i);
+                       int length = Character.charCount(c);
+                       switch (c) {
+                       case WildcardQuery.WILDCARD_STRING:
+                       case WildcardQuery.WILDCARD_CHAR:
+                               wildcards++;
+                               break;
+                       case WildcardQuery.WILDCARD_ESCAPE:
+                               if (i + length < text.length()) {
+                                       final int nextChar = text.codePointAt(i 
+ length);
+                                       length += Character.charCount(nextChar);
+                                       break;
+                               }
+                       }
+                       if(wildcards == nth) {
+                               return i;
+                       }
+                       i += length;
+               }
+               return -1;
+       }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModule.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModule.java
new file mode 100644
index 0000000..179682c
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModule.java
@@ -0,0 +1,51 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
+import org.wikimedia.search.extra.safer.Safeifier;
+import org.wikimedia.search.extra.safer.Safeifier.Action;
+import org.wikimedia.search.extra.safer.Safeifier.ActionModule;
+
+/**
+ * Register the CheckWildcardsAction
+ * @author nomoa
+ *
+ */
+public class CheckWildcardsActionModule implements ActionModule {
+       /**
+        * Maximum number of wildcards allowed in a wildcard query.
+        */
+       public final static int DEFAULT_MAX_WILDCARDS = 5;
+
+       public final static Action<DelayedWildcardQuery, Query> 
PARSE_DELAYED_WILDCARD = new Action<DelayedWildcardQuery, Query>() {
+               public Query apply(Safeifier safeifier, DelayedWildcardQuery q) 
{
+                       try {
+                               return q.compileWildcard();
+                       } catch (ParseException e) {
+                               throw new RuntimeException(e);
+                       }
+               }
+       };
+
+       private int maxWildcards = DEFAULT_MAX_WILDCARDS;
+       private CheckWildcardsAction checkWildcardsAction = 
CheckWildcardsAction.ERROR;
+
+       @Override
+       public void register(Safeifier safeifier) {
+               safeifier.register(DelayedWildcardQuery.class, new 
Action<DelayedWildcardQuery, Query>() {
+                       @Override
+                       public Query apply(Safeifier safeifier, 
DelayedWildcardQuery query) {
+                               return checkWildcardsAction.perform(query, 
maxWildcards);
+                       }
+               });
+       }
+
+       public CheckWildcardsActionModule maxWildcards(int maxWildcards) {
+               this.maxWildcards = maxWildcards;
+               return this;
+       }
+
+       public void checkWildcardsAction(CheckWildcardsAction 
checkWildcardsAction) {
+               this.checkWildcardsAction = checkWildcardsAction;
+       }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModuleParser.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModuleParser.java
new file mode 100644
index 0000000..1fa4150
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/CheckWildcardsActionModuleParser.java
@@ -0,0 +1,46 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+import java.io.IOException;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.QueryParsingException;
+import org.wikimedia.search.extra.safer.ActionModuleParser;
+
+public class CheckWildcardsActionModuleParser implements 
ActionModuleParser<CheckWildcardsActionModule> {
+
+    @Override
+    public String moduleName() {
+        return "wildcards";
+    }
+
+    @Override
+    public CheckWildcardsActionModule parse(QueryParseContext parseContext) 
throws IOException {
+        CheckWildcardsActionModule module = new CheckWildcardsActionModule();
+        XContentParser parser = parseContext.parser();
+        String currentFieldName = null;
+        XContentParser.Token token;
+        while ((token = parser.nextToken()) != 
XContentParser.Token.END_OBJECT) {
+            if (token == XContentParser.Token.FIELD_NAME) {
+                currentFieldName = parser.currentName();
+            } else if (token.isValue()) {
+                switch (currentFieldName) {
+                case "max_wildcards":
+                case "maxWildcards":
+                    module.maxWildcards(parser.intValue());
+                    break;
+                case "check_wildcards_action":
+                case "checkWildcardsAction":
+                    
module.checkWildcardsAction(CheckWildcardsAction.parse(parser.text()));
+                    break;
+                default:
+                    throw new QueryParsingException(parseContext.index(), 
"[safer][wildcards] query does not support the field ["
+                            + currentFieldName + "]");
+                }
+            } else {
+                throw new QueryParsingException(parseContext.index(), 
"[safer][wildcards] only supports values, not objects.");
+            }
+        }
+        return module;
+    }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/DelayedWildcardQuery.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/DelayedWildcardQuery.java
new file mode 100644
index 0000000..d29720f
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/DelayedWildcardQuery.java
@@ -0,0 +1,50 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+
+/**
+ * Just hold the query parser waiting for Safeifier to check the wildcard
+ */
+public class DelayedWildcardQuery extends Query {
+       private final Term term;
+       private RewriteMethod rewriteMethod;
+
+       public DelayedWildcardQuery(Term term, RewriteMethod rewriteMethod) {
+               this.term = term;
+               this.rewriteMethod = rewriteMethod;
+       }
+
+       @Override
+       public String toString(String field) {
+               return this.getClass().getSimpleName();
+       }
+
+       public Query compileWildcard() throws ParseException {
+               WildcardQuery wq = new WildcardQuery(term);
+               wq.setRewriteMethod(rewriteMethod);
+               return wq;
+       }
+
+       public Query compileWildcard(String newTerm) throws ParseException {
+        WildcardQuery wq = new WildcardQuery(new Term(term.field(), newTerm));
+        wq.setRewriteMethod(rewriteMethod);
+        return wq;
+       }
+
+       public String getTermText() {
+               return term.text();
+       }
+
+       @Override
+       public Query rewrite(IndexReader reader) throws IOException {
+               // Oops
+               throw new RuntimeException("This query must not be here...");
+       }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/SaferMapperQueryParser.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/SaferMapperQueryParser.java
new file mode 100644
index 0000000..9d869e6
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/SaferMapperQueryParser.java
@@ -0,0 +1,28 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+import org.apache.lucene.queryparser.classic.MapperQueryParser;
+import org.apache.lucene.queryparser.classic.QueryParserSettings;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
+import org.apache.lucene.search.Query;
+import org.elasticsearch.index.query.QueryParseContext;
+
+/**
+ * Wrapper around MapperQueryParser.
+ * Hack for {@link CheckWildcardsActionModule} that need to prevent those 
queries from being compiled
+ */
+public class SaferMapperQueryParser extends MapperQueryParser {
+    private RewriteMethod rewriteMethod = null;
+       public SaferMapperQueryParser(QueryParseContext parseContext) {
+               super(parseContext);
+       }
+
+    public void reset(QueryParserSettings settings) {
+        super.reset(settings);
+        this.rewriteMethod = settings.rewriteMethod();
+    }
+
+       @Override
+       protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
+           return new DelayedWildcardQuery(t, rewriteMethod);
+       }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/safer/wildcard/TooManyWildcardsException.java
 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/TooManyWildcardsException.java
new file mode 100644
index 0000000..d44916e
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/safer/wildcard/TooManyWildcardsException.java
@@ -0,0 +1,12 @@
+package org.wikimedia.search.extra.safer.wildcard;
+
+/**
+ * Too many wildcards detected
+ */
+public class TooManyWildcardsException extends RuntimeException {
+       private static final long serialVersionUID = 5575496998211483109L;
+
+    public TooManyWildcardsException(String message) {
+        super(message);
+    }
+}
diff --git 
a/src/test/java/org/wikimedia/search/extra/safer/CheckWildcardsTest.java 
b/src/test/java/org/wikimedia/search/extra/safer/CheckWildcardsTest.java
new file mode 100644
index 0000000..a1ef39a
--- /dev/null
+++ b/src/test/java/org/wikimedia/search/extra/safer/CheckWildcardsTest.java
@@ -0,0 +1,37 @@
+package org.wikimedia.search.extra.safer;
+
+import org.junit.Assert;
+
+import org.junit.Test;
+import org.wikimedia.search.extra.safer.wildcard.CheckWildcardsAction;
+
+public class CheckWildcardsTest {
+    @Test
+    public void test() {
+        assertSubstring("A*B", "A*B*", 2);
+        assertSubstring("A*B*", "A*B*", 3);
+        assertSubstring("A*B\\*", "A*B\\*", 2);
+        assertSubstring("A*B\\*C", "A*B\\*C*D", 2);
+        assertSubstring("A*B", "A*B?", 2);
+        assertSubstring("A*B\\?C", "A*B\\?C*D", 2);
+        assertSubstring("A*B\\?C", "A*B\\?C?D", 2);
+        assertSubstring("", "*", 1);
+        assertSubstring("\ud802\udd10*\ud802\udd10", 
"\ud802\udd10*\ud802\udd10*", 2);
+        assertSubstring("\ud802\udd10\\*\ud802\udd10*\ud802\udd10*", 
"\ud802\udd10\\*\ud802\udd10*\ud802\udd10*", 3);
+        assertSubstring("*\ud802\udd10\\*\ud802\udd10", 
"*\ud802\udd10\\*\ud802\udd10*\ud802\udd10*", 2);
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testEdge() {
+        assertSubstring(null, "", 0);
+    }
+
+    public void assertSubstring(String expected, String text, int nth) {
+        int r = CheckWildcardsAction.getNthWildcardIndex(text, nth);
+        String substring = text;
+        if(r >= 0) {
+            substring = text.substring(0, r);
+        }
+        Assert.assertEquals(expected, substring);
+    }
+}
diff --git 
a/src/test/java/org/wikimedia/search/extra/safer/SaferQueryStringTest.java 
b/src/test/java/org/wikimedia/search/extra/safer/SaferQueryStringTest.java
index 20f837e..187094d 100644
--- a/src/test/java/org/wikimedia/search/extra/safer/SaferQueryStringTest.java
+++ b/src/test/java/org/wikimedia/search/extra/safer/SaferQueryStringTest.java
@@ -23,6 +23,7 @@
 import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
 import org.wikimedia.search.extra.safer.phrase.PhraseTooLargeAction;
 import org.wikimedia.search.extra.safer.simple.SimpleActionModule.Option;
+import org.wikimedia.search.extra.safer.wildcard.CheckWildcardsAction;
 
 /**
  * Tests the safer query wrapping query_string queries similar to how <a 
>CirrusSearch</a> works.
@@ -134,6 +135,37 @@
         assertSearchByString("findme:\"CaptureAAAA Test\"");
     }
 
+    @Test
+    public void wildcards() {
+        QueryStringQueryBuilder qs = queryString("_all:Capt*u*r*e*AAAA");
+        SaferQueryBuilder b = new SaferQueryBuilder(qs);
+        client().prepareSearch("test").setQuery(b);
+        SearchRequestBuilder search = 
client().prepareSearch("test").setQuery(b);
+        assertSearchHits(search.get(), "delimited1");
+
+        qs = queryString("_all:Capt*u*r*e*AAAAITSME");
+        b = new SaferQueryBuilder(qs);
+        b.maxWildcards(3);
+        b.checkWildCardAction(CheckWildcardsAction.BREAK);
+        search = client().prepareSearch("test").setQuery(b)
+                .addHighlightedField("_all").setQuery(b)
+                .addHighlightedField("findme").setQuery(b);
+        assertSearchHits(search.get());
+
+        qs = queryString("_all:Cap*tu*AAA*THIS_SHOULD_BE_IGNORED");
+        b = new SaferQueryBuilder(qs);
+        b.maxWildcards(2);
+        b.checkWildCardAction(CheckWildcardsAction.BREAK);
+        search = client().prepareSearch("test").setQuery(b);
+        assertSearchHits(search.get(), "delimited1", "delimited2");
+
+        qs = queryString("_all:0*1*2*3*4*5*6*7*8*9*1*1*3*4*5*2*3*4");
+        b = new SaferQueryBuilder(qs);
+        b.maxWildcards(2);
+        search = client().prepareSearch("test").setQuery(b);
+        assertFailures(search, RestStatus.BAD_REQUEST, 
containsString("WildCard query is too complex"));
+    }
+
     private void assertSearchByFieldsFails(boolean total, String query, 
String... fields) {
         QueryStringQueryBuilder qs = queryString(query);
         SaferQueryBuilder b = new 
SaferQueryBuilder(qs).maxTermsPerPhraseQuery(5).maxTermsInAllPhraseQueries(8);

-- 
To view, visit https://gerrit.wikimedia.org/r/219182
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifcc9e55a4d81d48c05a76ca1afdd9b3adbf86006
Gerrit-PatchSet: 1
Gerrit-Project: search/extra
Gerrit-Branch: 1.3
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to