Bearloga has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/277679

Change subject: Search engine detection bug fix
......................................................................

Search engine detection bug fix

- We noticed that requests referred from http://google.com were not
  being detected as coming from Google. This patch aims to fix that.
- Also adds the Russian search engine Rambler.ru to the SE enum.

Change-Id: Iab14d7398031f447e2aa8305a36895bfd17d141c
---
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngine.java
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java
M refinery-core/src/test/resources/referer_test_data.csv
3 files changed, 6 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/79/277679/1

diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngine.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngine.java
index 2339abf..0150907 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngine.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngine.java
@@ -25,11 +25,12 @@
  * to specify a string with spaces and symbols if needed.
  */
 public enum SearchEngine {
-    GOOGLE("Google", "\\.google\\."),
+    GOOGLE("Google", "\\.?google\\."),
     YAHOO("Yahoo", "search\\.yahoo\\."),
     BING("Bing", "\\.bing\\."),
     YANDEX("Yandex", "yandex\\."),
-    BAIDU("Baidu", "\\.baidu\\.");
+    BAIDU("Baidu", "\\.baidu\\."),
+    RAMBLER("Rambler", "\\.rambler\\.");
 
     private final String searchEngineName;
     private final Pattern pattern;
diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java
index 8fe4bb8..9cf6655 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java
@@ -38,7 +38,7 @@
     /*
      * A simple pattern for search identification
      */
-    private static final Pattern searchPattern = 
Pattern.compile("(\\.(baidu|bing|google)|search\\.yahoo|yandex)\\.");
+    private static final Pattern searchPattern = 
Pattern.compile("(\\.?(rambler|baidu|bing|google)|search\\.yahoo|yandex)\\.");
 
     /**
      * Crudely subsets a referer to just contain the domain,
diff --git a/refinery-core/src/test/resources/referer_test_data.csv 
b/refinery-core/src/test/resources/referer_test_data.csv
index c6de7b8..16ada44 100644
--- a/refinery-core/src/test/resources/referer_test_data.csv
+++ b/refinery-core/src/test/resources/referer_test_data.csv
@@ -2,8 +2,10 @@
 Random internal 
link,https://zh.wikipedia.org/zh-tw/%E6%96%B9%E6%9D%B1%E6%98%87,internal,false,none
 Nada,-,none,false,none
 Google,https://www.google.co.id/,external (search engine),true,Google
+Google,http://google.com/,external (search engine),true,Google
 
Yahoo,http://search.yahoo.co.jp/search?fr=slv1-necpc9&p=%E4%B8%89%E5%8F%88&ei=UTF-8,external
 (search engine),true,Yahoo
 Random external link,http://www.cowboom.com/product/1617241,external,false,none
 Bing,http://www.bing.com/search?q=Svengali+movie+1931&filters=ufn,external 
(search engine),true,Bing
 Baidu,https://www.baidu.com/link?url=,external (search engine),true,Baidu
 
Yandex,http://yandex.ru/clck/jsredir?from=yandex.ru%3Bsearch%2F%3Bweb%3B%3B,external
 (search engine),true,Yandex
+Rambler,http://www.rambler.ru/,external (search engine),true,Rambler

-- 
To view, visit https://gerrit.wikimedia.org/r/277679
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iab14d7398031f447e2aa8305a36895bfd17d141c
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Bearloga <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to