Nuria has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/327855 )

Change subject: UDF for extracting primary full text search request
......................................................................


UDF for extracting primary full text search request

CirrusSearch logs contain each individual request made from mediawiki to
elasticsearch. Often times we need to extract only the primary request
to do analysis on. It's quite a pain to do in HQL, so this UDF handles
it for us.

Adds a helper class for dealing with the structs, as I have the feeling
there will be more use cases for UDF's that directly handle the list of
structs.

Bug: T162054
Change-Id: I67d5f0e7674f970b353ab5992fec1431f4592256
---
A 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/CirrusRequestDeser.java
M 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GenericUDFHelper.java
A 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetMainSearchRequestUDF.java
A 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetMainSearchRequestUDF.java
4 files changed, 506 insertions(+), 10 deletions(-)

Approvals:
  jenkins-bot: Verified
  DCausse: Looks good to me, but someone else must approve
  Gehel: Looks good to me, but someone else must approve
  Nuria: Looks good to me, approved



diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/CirrusRequestDeser.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/CirrusRequestDeser.java
new file mode 100644
index 0000000..1f39e5e
--- /dev/null
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/CirrusRequestDeser.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+
+/**
+ * Helper class for dealing with the array<struct<...>> in
+ * cirrussearchrequestset.
+ *
+ * Currently only extracts querytype and indices, but can be expanded as
+ * necessary to meet our needs in various UDFs.
+ */
+class CirrusRequestDeser {
+    private final StructObjectInspector oi;
+
+    private final StructField queryTypeField;
+    private final StringObjectInspector queryTypeOI;
+
+    private final StructField indicesField;
+    private final ListObjectInspector indicesOI;
+    private final StringObjectInspector indicesElemOI;
+
+    /**
+     * @param argsHelper
+     *          Helper object to verify hive structures match the expected 
formats.
+     * @param i
+     *          Argument index of oi (what position was oi in the argument list
+     *          of the UDF)
+     * @param oi
+     *          Object inspector for the UDF arguments. This must match the
+     *          CirrusSearchRequest structure in the CirrusSearchRequestSet
+     *          Avro schema.
+     *
+     * @throws UDFArgumentTypeException
+     *          Thrown when oi does not match the CirrusSearchRequest structure
+     *          in the CirrusSearchRequestSet Avro schema.
+     *  Thrown when 
+     */
+    public CirrusRequestDeser(GenericUDFHelper argsHelper, int i, 
StructObjectInspector oi)
+        throws UDFArgumentTypeException {
+        this.oi = oi;
+        // verify the 'querytype' field is a string
+        queryTypeField = oi.getStructFieldRef("querytype");
+        if (queryTypeField == null) {
+            throw new UDFArgumentTypeException(i,
+                argsHelper.getFuncName() + " Argument should contain a struct 
with a 'querytype' field");
+        }
+        ObjectInspector queryTypeOI = queryTypeField.getFieldObjectInspector();
+        argsHelper.checkArgType(queryTypeOI, i, PrimitiveCategory.STRING);
+
+        this.queryTypeOI = (StringObjectInspector) queryTypeOI;
+
+        // verify the 'indices' field is a list of strings
+        indicesField = oi.getStructFieldRef("indices");
+        if (indicesField == null) {
+            throw new UDFArgumentTypeException(i,
+                argsHelper.getFuncName() + " Argument should contain a struct 
with an 'indices' field");
+        }
+        argsHelper.checkListArgType(indicesField.getFieldObjectInspector(), i, 
PrimitiveCategory.STRING);
+        indicesOI = (ListObjectInspector) 
indicesField.getFieldObjectInspector();
+        this.indicesElemOI = (StringObjectInspector) 
indicesOI.getListElementObjectInspector();
+    }
+
+    public String getQueryType(Object struct) {
+        Object type = oi.getStructFieldData(struct, queryTypeField);
+        return queryTypeOI.getPrimitiveJavaObject(type);
+    }
+
+    public String[] getIndices(Object struct) {
+        Object list = oi.getStructFieldData(struct, indicesField);
+        if (list == null) {
+            return null;
+        }
+
+        int len = indicesOI.getListLength(list);
+        String[] indices = new String[len];
+        int i = 0;
+        for (Object inner : indicesOI.getList(list)) {
+            indices[i++] = indicesElemOI.getPrimitiveJavaObject(inner);
+        }
+
+        return indices;
+    }
+}
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GenericUDFHelper.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GenericUDFHelper.java
index ac6fcce..a8c97f1 100644
--- 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GenericUDFHelper.java
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GenericUDFHelper.java
@@ -2,7 +2,9 @@
 
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 
@@ -58,9 +60,52 @@
      */
     protected void checkArgPrimitive(ObjectInspector[] arguments, int i)
         throws UDFArgumentTypeException{
-        ObjectInspector.Category oiCat = arguments[i].getCategory();
-        if (oiCat != ObjectInspector.Category.PRIMITIVE) {
-            throw new UDFArgumentTypeException(i, getFuncName() + " Argument 
should be of primitive type");
+        checkArgType(arguments, i, Category.PRIMITIVE);
+    }
+
+    /**
+     * Checks argument category type is primitive
+     *
+     * @param oi
+     * @param i
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkArgPrimitive(ObjectInspector oi, int i) throws 
UDFArgumentTypeException {
+        checkArgType(oi, i, Category.PRIMITIVE);
+    }
+
+    /**
+     * Checks argument type
+     *
+     * @param arguments
+     * @param i
+     * @param type
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkArgType(ObjectInspector[] arguments, int i, 
PrimitiveCategory type)
+        throws UDFArgumentTypeException{
+        checkArgType(arguments[i], i, type);
+    }
+
+    /**
+     * Checks argument type
+     *
+     * @param oi
+     * @param i
+     * @param type
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkArgType(ObjectInspector oi, int i, PrimitiveCategory 
type) throws UDFArgumentTypeException {
+        checkArgPrimitive(oi, i);
+        PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) 
oi).getPrimitiveCategory();
+
+        if (primitiveCategory != type) {
+            throw new UDFArgumentTypeException(0,
+                "A " + type.getClass().getSimpleName() + " argument was 
expected for "
+                + getFuncName() + " but an argument of type " + 
oi.getTypeName() + " was given.");
         }
     }
 
@@ -69,18 +114,83 @@
      *
      * @param arguments
      * @param i
+     * @param Category
      *
      * @throws UDFArgumentTypeException
      */
-    protected void checkArgType(ObjectInspector[] arguments, int i, 
PrimitiveCategory type)
-            throws UDFArgumentTypeException{
+    protected void checkArgType(ObjectInspector[] arguments, int i, Category 
type)
+        throws UDFArgumentTypeException{
+        checkArgType(arguments[i], i, type);
+    }
 
-        PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) 
arguments[i]).getPrimitiveCategory();
+    protected void checkArgType(ObjectInspector oi, int i, Category type)
+        throws UDFArgumentTypeException{
+        Category oiCat = oi.getCategory();
+        if (oiCat != type) {
+            throw new UDFArgumentTypeException(i, getFuncName() + " Argument 
should be of "
+                    + type.getClass().getSimpleName() + " type");
+        }
+    }
 
-        if (primitiveCategory != type) {
-            throw new UDFArgumentTypeException(0,
-                "A string argument was expected for " + getFuncName() + " but 
an argument of type " +
-                arguments[i].getTypeName() + " was given."
+    /**
+     * Checks argument type of list and it's elements
+     *
+     * @param arguments
+     * @param i
+     * @param Category
+     *            Type of list elements
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkListArgType(ObjectInspector[] arguments, int i, 
Category type)
+        throws UDFArgumentTypeException {
+        checkListArgType(arguments[i], i, type);
+    }
+
+    /**
+     * Checks argument type of list and it's elements
+     *
+     * @param oi
+     * @param i
+     * @param type
+     *            Type of list elements
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkListArgType(ObjectInspector oi, int i, Category type)
+        throws UDFArgumentTypeException {
+        checkArgType(oi, i, Category.LIST);
+
+        ListObjectInspector listOI = (ListObjectInspector) oi;
+        if (listOI.getListElementObjectInspector().getCategory() != type) {
+            throw new UDFArgumentTypeException(i,
+                "An array<" + type.getClass().getSimpleName() + " argument was 
expected "
+                + "for " + getFuncName() + " but an argument of type array<"
+                + listOI.getListElementObjectInspector().getTypeName() + "> 
was given."
+            );
+        }
+    }
+
+    /**
+     * Checks argument type of list and it's elements
+     *
+     * @param oi
+     * @param i
+     * @param type
+     *            Type of list elements
+     *
+     * @throws UDFArgumentTypeException
+     */
+    protected void checkListArgType(ObjectInspector oi, int i, 
PrimitiveCategory type)
+        throws UDFArgumentTypeException {
+        checkListArgType(oi, i, Category.PRIMITIVE);
+        ListObjectInspector listOI = (ListObjectInspector) oi;
+        PrimitiveObjectInspector listElemOI = (PrimitiveObjectInspector) 
listOI.getListElementObjectInspector();
+        if (listElemOI.getPrimitiveCategory() != type) {
+            throw new UDFArgumentTypeException(i,
+                "An array<" + type.getClass().getSimpleName() + " argument was 
expected "
+                + "for " + getFuncName() + " but an argument of type array<"
+                + listElemOI.getTypeName() + "> was given."
             );
         }
     }
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetMainSearchRequestUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetMainSearchRequestUDF.java
new file mode 100644
index 0000000..d96bd00
--- /dev/null
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetMainSearchRequestUDF.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+
+/**
+ * Returns the primary full text search request from provided list of requests.
+ *
+ * Operates on the specialized format of the `requests` field in the
+ * wmf_raw.CirrusSearchRequestSets table to locate the primary full text search
+ * request from a set of requests. When doing a full text search in mediawiki
+ * there are a number of auxilliary requests performed: An initial near match
+ * search for similar titles to be redirected to directly, the full text
+ * searches for the current wiki and a number of it's sister wikis. There are
+ * additionally second try searches that are performed if the first full text
+ * search performs poorly. These may be a new full text search for a suggested
+ * term, or a search against a wiki in the same project but in another
+ * language. The search log contains an entry for every one of these individual
+ * requests.
+ *
+ * Some analytics tasks only need to know about the primary request. This UDF
+ * takes in the name of the current wiki and the list of requests made, and
+ * returns a single reuqest from that list, which is the primary full text
+ * search request.
+ */
+@Description(name = "get_main_search_request",
+    value = "_FUNC_(wiki, requests) - Returns the primary full text search 
request from requests")
+@UDFType(deterministic = true)
+public class GetMainSearchRequestUDF extends GenericUDF {
+    final private static String FULL_TEXT = "full_text";
+
+    private StringObjectInspector wikiOI;
+    private ListObjectInspector listOI;
+    private CirrusRequestDeser deser;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        GenericUDFHelper argsHelper = new GenericUDFHelper();
+        argsHelper.checkArgsSize(arguments, 2, 2);
+
+        // first argument must be a string
+        argsHelper.checkArgType(arguments, 0, PrimitiveCategory.STRING);
+        wikiOI = (StringObjectInspector) arguments[0];
+
+        // second argument must be an array of structs
+        argsHelper.checkListArgType(arguments, 1, 
ObjectInspector.Category.STRUCT);
+        listOI = (ListObjectInspector) arguments[1];
+        StructObjectInspector elemOI = (StructObjectInspector) 
listOI.getListElementObjectInspector();
+        // deser will do reset of the validation
+        deser = new CirrusRequestDeser(argsHelper, 1, elemOI);
+
+        // Return value is a struct from the list
+        return elemOI;
+    }
+
+    @Override
+    public String getDisplayString(String[] arguments) {
+        return "GetMainSearchRequest(" + arguments[0] + ", " + arguments[1] + 
")";
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] dos) throws HiveException {
+        if (dos[0].get() == null || dos[1].get() == null) {
+            return null;
+        }
+        if (listOI.getListLength(dos[1].get()) == 0) {
+            return null;
+        }
+
+        List<?> requests = listOI.getList(dos[1].get());
+        String wiki = wikiOI.getPrimitiveJavaObject(dos[0].get());
+        String prefix = wiki + "_";
+        for (Object request : requests) {
+            String[] indices = deser.getIndices(request);
+            if (indices == null) {
+                continue;
+            }
+            for (String index : indices) {
+                // If the request was made against a different wiki
+                // ignore it, it's likely part of interwiki search.
+                if (index != null && (index.equals(wiki) || 
index.startsWith(prefix))
+                // At least one request must be a full_text query
+                    && FULL_TEXT.equals(deser.getQueryType(request))) {
+                    return request;
+                }
+            }
+        }
+
+        return null;
+    }
+}
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetMainSearchRequestUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetMainSearchRequestUDF.java
new file mode 100644
index 0000000..0cb441f
--- /dev/null
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetMainSearchRequestUDF.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import junitparams.JUnitParamsRunner;
+import junitparams.Parameters;
+
+@RunWith(JUnitParamsRunner.class)
+public class TestGetMainSearchRequestUDF {
+    private GetMainSearchRequestUDF udf;
+
+    @Before
+    public void setUp() throws HiveException {
+        udf = new GetMainSearchRequestUDF();
+
+        List<String> fields = new ArrayList<>();
+        List<ObjectInspector> fieldOIs = new ArrayList<>();
+
+        fields.add("querytype");
+        
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+
+        fields.add("indices");
+        fieldOIs.add(ObjectInspectorFactory
+            
.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector));
+
+        ObjectInspector wikiOI = 
PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+        ObjectInspector listOI = ObjectInspectorFactory
+            
.getStandardListObjectInspector(ObjectInspectorFactory.getStandardStructObjectInspector(fields,
 fieldOIs));
+
+        udf.initialize(new ObjectInspector[] { wikiOI, listOI });
+    }
+
+    @Test
+    @Parameters(method = "paramsForTest")
+    public void testEvaluate(String message, int expectIndex, String wiki, 
Object[] requests) throws HiveException {
+        GenericUDF.DeferredObject[] dois = {
+            new GenericUDF.DeferredJavaObject(wiki), new 
GenericUDF.DeferredJavaObject(requests) };
+        Object expect = null;
+        if (expectIndex >= 0) {
+            expect = requests[expectIndex];
+        }
+        assertEquals(message, expect, udf.evaluate(dois));
+    }
+
+    private Object[] paramsForTest() {
+        return new Object[] {
+            // Each 'new Object[]' at this level is a single test case
+            // representing a plausible set of search requests that could be
+            // logged to the `cirrussearchrequestset` table and the desired
+            // index of the primary full text search request in that set of
+            // requests.
+            new Object[] {
+                "simplest passing example", 0, "enwiki",
+                new Object[] {
+                    // This level is a list of requests made between mediawiki
+                    // and elasticsearch in the process of answering a search
+                    // request made of mediawiki. Multiple individual requests
+                    // are issued between mediawiki and elasticsearch, with the
+                    // goal of this UDF being to decide which one of those is
+                    // the primary full text search.
+                    new Object[] { "full_text", new String[] { 
"enwiki_content" } }
+                } },
+            new Object[] {
+                // Example of search request to a mediawiki instance 
configured with
+                // sister search. With sistersearch a full text request is 
made against
+                // one or more wikis with the same language but in a different 
project.
+                "including interwiki", 1, "enwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { 
"enwiki_content" } },
+                    new Object[] { "full_text", new String[] { 
"enwiki_content" } },
+                    new Object[] { "full_text", new String[] { 
"enwikibooks_content" } } } },
+            new Object[] {
+                // The order of sister searches doesn't have to be with the 
local wiki coming
+                // first. Make sure the UDF appropriately finds the primary 
request even
+                // if it comes after the sister search.
+                "interwiki alternate order", 3, "zhwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { 
"zhwiki_content" } },
+                    new Object[] { "full_text", new String[] { 
"zhwikisource_content" } },
+                    new Object[] { "full_text", new String[] { 
"zhwiktionary_content" } },
+                    new Object[] { "full_text", new String[] { 
"zhwiki_content" } } } },
+            new Object[] {
+                // Example of a search request that was run against multimedia
+                // search. In this case the full text search was performed
+                // against multiple indices, merging the results from frwiki
+                // and commonswiki.
+                "multimedia search", 1, "frwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { 
"frwiki_general" } },
+                    new Object[] { "full_text", new String[] { 
"frwiki_general", "commonswiki_file" } } } },
+            new Object[] {
+                // Search requests that ask for the 'everything' profile don't
+                // perform their search against a _content or _general index,
+                // instead using a top level alias in elasticsearch that refers
+                // to both. Ensure the UDF can still find the appropriate
+                // search request.
+                "search everything", 1, "enwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { "enwiki" } },
+                    new Object[] { "full_text", new String[] { "enwiki", 
"commonswiki_file" } } } },
+            new Object[] {
+                // Autocomplete search requests are not considered full text.
+                // As such the UDF should return NULL rather than the
+                // autocomplete request.
+                "completion suggester", -1, "itwiki",
+                new Object[] { new Object[] { "comp_suggest", new String[] { 
"itwiki_titlesuggest" } } } },
+            new Object[] {
+                // Mediawiki's 'Go' feature takes full text searches submitted
+                // in a particular way and looks for 'close enough' titles. If
+                // there is a title or redirect very close to the submitted
+                // string no full text search is performed and the user is
+                // redirected. These requests must be detected as not having a
+                // primary full text search
+                "successfull 'go'", -1, "arwiki",
+                new Object[] { new Object[] { "near_match", new String[] { 
"arwiki_content" } } } },
+            new Object[] {
+                // This type of request should generally not be recorded, but
+                // test the UDF with some odd inputs to ensure it is robust to
+                // the oddities that may occasionally arise.
+                "invalid record with null querytype", -1, "zhwiki",
+                new Object[] { new Object[] { null, new String[] { 
"zhwiki_content" } } } },
+            new Object[] {
+                // This type of request should generally not be recorded, but
+                // test the UDF with some odd inputs to ensure it is robust to
+                // the oddities that may occasionally arise.
+                "invalid record null querytype later", -1, "thwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { 
"thwiki_content" } },
+                    new Object[] { null, new String[] { "thwiki_content" } } } 
},
+            new Object[] {
+                // This type of request should generally not be recorded, but
+                // test the UDF with some odd inputs to ensure it is robust to
+                // the oddities that may occasionally arise.
+                "invalid record with null indices", -1, "thwiki",
+                new Object[] {
+                    new Object[] { "near_match", new String[] { 
"thwiki_content" } },
+                    new Object[] { "full_text", null } } } };
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/327855
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I67d5f0e7674f970b353ab5992fec1431f4592256
Gerrit-PatchSet: 6
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: Bearloga <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Gehel <[email protected]>
Gerrit-Reviewer: Joal <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to