DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/315503

Change subject: [search] Add support for generator api requests
......................................................................

[search] Add support for generator api requests

I think we underestimate api usage for some kind of searches:
- cirrus fulltext searches
- geosearch
All Search APIs can be called by using a list or a generetor.

Change-Id: Ie3ded1eb58259ad6689ebfdc6a7d7edad91a83f8
---
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchRequest.java
M 
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestSearchRequest.java
2 files changed, 54 insertions(+), 104 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/03/315503/1

diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchRequest.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchRequest.java
index ca27623..fc1d30a 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchRequest.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchRequest.java
@@ -16,6 +16,8 @@
 
 package org.wikimedia.analytics.refinery.core;
 
+import java.util.regex.Pattern;
+
 /**
  * Functions to work with Wikimedia webrequest data.
  * These functions are optimised for identifying and categorising API requests 
using the search system.
@@ -49,13 +51,11 @@
 
     private final String queryAction = "action=query";
 
-    private final String prefixSearchList = "list=prefixsearch";
+    private final Pattern prefixSearch = 
Pattern.compile("(list|generator)=prefixsearch");
 
-    private final String prefixSearchGenerator = "generator=prefixsearch";
-
-    private final String searchList = "list=search";
-
-    private final String geoSearchList = "list=geosearch";
+    private final Pattern search = Pattern.compile("(list|generator)=search");
+    
+    private final Pattern geoSearch = 
Pattern.compile("(list|generator)=geosearch");
 
     private final String apiPath = "api.php";
 
@@ -79,15 +79,15 @@
         {
             if(Utilities.stringContains(uriQuery, queryAction))
             {
-                if(Utilities.stringContains(uriQuery, prefixSearchList) || 
Utilities.stringContains(uriQuery, prefixSearchGenerator))
+                if(Utilities.patternIsFound(prefixSearch, uriQuery))
                 {
                     output = "prefix";
                 }
-                else if(Utilities.stringContains(uriQuery, searchList))
+                else if(Utilities.patternIsFound(search, uriQuery))
                 {
                     output = "cirrus";
                 }
-                else if(Utilities.stringContains(uriQuery, geoSearchList))
+                else if(Utilities.patternIsFound(geoSearch, uriQuery))
                 {
                     output = "geo";
                 }
diff --git 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestSearchRequest.java
 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestSearchRequest.java
index f76d556..fa6ccdb 100644
--- 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestSearchRequest.java
+++ 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestSearchRequest.java
@@ -15,103 +15,53 @@
 package org.wikimedia.analytics.refinery.core;
 
 
-import junit.framework.TestCase;
+import java.util.Arrays;
+import java.util.Collection;
 
-public class TestSearchRequest extends TestCase {
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
 
-    //Generic classifier assertion
-    private void assertSearchClassifier(final String uriPath, final String 
uriQuery, final String expected) {
+@RunWith(Parameterized.class)
+public class TestSearchRequest {
+    private final String what;
+    private final  String uriPath;
+    private final  String uriQuery;
+    private final  String expected;
+    private final boolean searchAPI;
+    
+    public TestSearchRequest(String what, String uriPath, String uriQuery, 
String expected, boolean searchAPI) {
+        super();
+        this.what = what;
+        this.uriPath = uriPath;
+        this.uriQuery = uriQuery;
+        this.expected = expected;
+        this.searchAPI = searchAPI;
+    }
+
+    @Parameters
+    public static Collection<Object[]> getData() {
+        // Page, URI PARAMS, expected classifySearchRequest, isSearchRequest
+        return Arrays.asList(new Object[][]{
+            {"normal page", "/wiki/Foobarbaz", "", "", false},
+            {"random api", "w/api.php", 
"maxlag=5&format=json&meta=userinfo&action=query", "", false},
+            {"search api via list", "w/api.php", 
"action=query&list=search&srsearch=hosted desktop&srprop=snippet", "cirrus", 
true},
+            {"search api via generator", "/w/api.php", 
"?action=query&format=json&prop=pageterms%7Cpageimages&wbptterms=description&generator=search&gsrsearch=blah+blah&gsrnamespace=0&gsrwhat=text&gsrinfo=&gsrprop=redirecttitle&gsrlimit=12&piprop=thumbnail&pithumbsize=96&pilimit=12&continue=",
 "cirrus", true},
+            {"opensearch api", "/w/api.php", 
"action=opensearch&format=json&search=d1&namespace=0&limit=10", "open", true},
+            {"language search", 
"w/api.php","action=languagesearch&search=espa", "language", true},
+            {"geosearch via list", 
"w/api.php","action=query&list=geosearch&gsradius=10000&gscoord=13.99861|100.53008",
 "geo", true},
+            {"geosearch via generator", 
"w/api.php","?action=query&format=json&prop=coordinates%7Cpageimages%7Cpageterms&colimit=100&piprop=thumbnail&pithumbsize=320&pilimit=100&wbptterms=description&generator=geosearch&ggscoord=12.306473%7C10.254717&ggsradius=520.3277496558758&ggslimit=100&continue=",
 "geo", true},
+            {"prefix via list", 
"/w/api.php","action=query&format=json&generator=prefixsearch&list=prefixsearch&pssearch=O",
 "prefix", true},
+            {"prefix via generator", 
"w/api.php","?action=query&format=json&prop=pageprops%7Cpageprops%7Cpageimages%7Cpageterms&generator=prefixsearch&ppprop=displaytitle&piprop=thumbnail&pithumbsize=80&pilimit=15&wbptterms=description&redirects=&gpssearch=blah+blah%C4%87&gpsnamespace=0&gpslimit=15",
 "prefix", true},
+        });
+    }
+
+    @Test
+    public void testSearchClassifier() {
         String actual = 
SearchRequest.getInstance().classifySearchRequest(uriPath, uriQuery);
-
-        assertEquals("The actual output does not match the expected output",
-                expected, actual);
-    }
-
-    //Test a normal request. Expect an empty string from the search classifer.
-    public void testNoneClassify() {
-        assertSearchClassifier("/wiki/Foobarbaz", "", "");
-    }
-
-    //Test an API request lacking any of the right action=foo entries.
-    public void testAPINoActionClassify() {
-        
assertSearchClassifier("w/api.php","maxlag=5&format=json&meta=userinfo&action=query",
 "");
-    }
-
-    //Test an API request with an acceptable action but no search entry
-    public void testAPINoSearchClassify() {
-        
assertSearchClassifier("w/api.php","action=query&prop=revisions&titles=hall&rvprop=content",
 "");
-    }
-
-    //Test a Cirrus Search request. Expect "cirrus"
-    public void testCirrusClassify() {
-        
assertSearchClassifier("w/api.php","action=query&list=search&srsearch=hosted 
desktop&srprop=snippet", "cirrus");
-    }
-
-    //Test a Open Search request. Expect "open"
-    public void testOpenClassify() {
-        
assertSearchClassifier("w/api.php","action=opensearch&format=json&search=d1&namespace=0&limit=10",
 "open");
-    }
-
-    //Test a Language Search request. Expect "language"
-    public void testLanguageClassify() {
-        
assertSearchClassifier("w/api.php","action=languagesearch&search=espa", 
"language");
-    }
-
-    //Test a Geo Search request. Expect "geo"
-    public void testGeoClassify() {
-        
assertSearchClassifier("w/api.php","action=query&list=geosearch&gsradius=10000&gscoord=13.99861|100.53008",
 "geo");
-    }
-
-    //Test a Prefix Search request. Expect "prefix"
-    public void testPrefixClassify() {
-        
assertSearchClassifier("w/api.php","action=query&format=json&generator=prefixsearch&list=prefixsearch&pssearch=O",
 "prefix");
-    }
-
-    //Generic boolean assertion
-    private void assertSearchBoolean(final String uriPath, final String 
uriQuery, final boolean expected) {
-        boolean actual = SearchRequest.getInstance().isSearchRequest(uriPath, 
uriQuery);
-
-        assertEquals("The actual output does not match the expected output",
-                expected, actual);
-    }
-
-    //Test a normal request. Expect false.
-    public void testNoneBoolean() {
-        assertSearchBoolean("/wiki/Foobarbaz", "", false);
-    }
-
-    //Test an API request lacking any of the right action=foo entries. Expect 
false.
-    public void testAPINoActionBoolean() {
-        
assertSearchBoolean("w/api.php","maxlag=5&format=json&meta=userinfo&action=query",
 false);
-    }
-
-    //Test an API request with an acceptable action but no search entry. 
Expect false.
-    public void testAPINoSearchBoolean() {
-        
assertSearchBoolean("w/api.php","action=query&prop=revisions&titles=hall&rvprop=content",
 false);
-    }
-
-    //Test a Cirrus Search request. Expect true
-    public void testCirrusBoolean() {
-        
assertSearchBoolean("w/api.php","action=query&list=search&srsearch=hosted 
desktop&srprop=snippet", true);
-    }
-
-    //Test a Open Search request. Expect true
-    public void testOpenBoolean() {
-        
assertSearchBoolean("w/api.php","action=opensearch&format=json&search=d1&namespace=0&limit=10",
 true);
-    }
-
-    //Test a Language Search request. Expect true
-    public void testLanguageBoolean() {
-        assertSearchBoolean("w/api.php","action=languagesearch&search=espa", 
true);
-    }
-
-    //Test a Geo Search request. Expect true
-    public void testGeoBoolean() {
-        
assertSearchBoolean("w/api.php","action=query&list=geosearch&gsradius=10000&gscoord=13.99861|100.53008",
 true);
-    }
-
-    //Test a Prefix Search request. Expect true
-    public void testPrefixBoolean() {
-        
assertSearchBoolean("w/api.php","action=query&format=json&generator=prefixsearch&list=prefixsearch&pssearch=O",
 true);
+        Assert.assertEquals(what + " (classifySearchRequest)", expected, 
actual);
+        Assert.assertEquals(what + " (isSearch)", searchAPI, 
SearchRequest.getInstance().isSearchRequest(uriPath, uriQuery));
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/315503
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie3ded1eb58259ad6689ebfdc6a7d7edad91a83f8
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to