Joal has uploaded a new change for review.
https://gerrit.wikimedia.org/r/195952
Change subject: Move UAParser wrapper to refinery-core and update refinery-hive
accordingly.
......................................................................
Move UAParser wrapper to refinery-core and update refinery-hive accordingly.
Change-Id: I77a3cf57ed96658b763370423fe79bb1b4aded4d
---
M refinery-core/pom.xml
A
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java
A
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java
A
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java
M refinery-hive/pom.xml
M
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java
6 files changed, 479 insertions(+), 81 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source
refs/changes/52/195952/1
diff --git a/refinery-core/pom.xml b/refinery-core/pom.xml
index d8a7a6f..c562502 100644
--- a/refinery-core/pom.xml
+++ b/refinery-core/pom.xml
@@ -56,6 +56,16 @@
<artifactId>commons-lang3</artifactId>
</dependency>
+ <dependency>
+ <groupId>ua_parser</groupId>
+ <artifactId>ua-parser</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.googlecode.json-simple</groupId>
+ <artifactId>json-simple</artifactId>
+ </dependency>
+
</dependencies>
<build>
diff --git
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java
new file mode 100644
index 0000000..2acab8c
--- /dev/null
+++
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java
@@ -0,0 +1,117 @@
+/**
+ * Copyright (C) 2015 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+
+import org.apache.log4j.Logger;
+import ua_parser.*;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains functions to parse user agent string using ua-parser library
+ */
+public class UAParser {
+
+ public static final String NA = "-";
+
+ static final Logger LOG = Logger.getLogger(UAParser.class.getName());
+
+ private CachingParser cachingParser;
+ private Map<String, String> result = new HashMap<String, String>();
+
+ /**
+ * Function replacing null/empty string with the NA one.
+ * @param str the string to check
+ * @return the original string if not null/empty, NA otherwise
+ */
+ private String replaceNA(String str) {
+ final String ret;
+ if (str == null || str.isEmpty() || str.equals("-")) {
+ ret = NA;
+ } else {
+ ret = str;
+ }
+ return ret;
+ }
+
+ public UAParser() {
+ try {
+ cachingParser = new CachingParser();
+ } catch (IOException e) {
+ // no recovery should be possible, log and rethrow
+ // runtime exception will be logged to stdout by default
+ LOG.error(e.getMessage(), e);
+ throw new RuntimeException("Failed to instantiate CachingParser");
+ }
+ }
+
+ /**
+ * Function extracting browser, device and os information from the UA
string.
+ * @param uaString the ua string to parse
+ * @return the ua map with browser_name, browser_major, device, os_name,
+ * os_minor, os_major keys and associated values.
+ */
+ public Map<String, String> getUAMap(String uaString) {
+ result.clear();
+
+ UserAgent browser = null;
+ Device device = null;
+ OS os = null;
+
+ try {
+ Client c = cachingParser.parse(uaString);
+ if (c != null) {
+ browser = c.userAgent;
+ device = c.device;
+ os = c.os;
+ }
+ } catch (Exception e) {
+ // catch it all to make sure job does not halt if one record is
faulty
+ LOG.error(e.getMessage(), e);
+ }
+
+ if (browser != null) {
+ result.put("browser_family", replaceNA(browser.family));
+ result.put("browser_major", replaceNA(browser.major));
+ } else {
+ result.put("browser_family", NA);
+ result.put("browser_major", NA);
+ }
+
+ if (device != null) {
+ result.put("device_family", replaceNA(device.family));
+ } else {
+ result.put("device_family", NA);
+ }
+
+ if (os != null) {
+ result.put("os_family", replaceNA(os.family));
+ result.put("os_major", replaceNA(os.major));
+ result.put("os_minor", replaceNA(os.minor));
+ } else {
+ result.put("os_family", NA);
+ result.put("os_major", NA);
+ result.put("os_minor", NA);
+ }
+
+ return result;
+ }
+
+}
diff --git
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java
new file mode 100644
index 0000000..d1f224f
--- /dev/null
+++
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java
@@ -0,0 +1,206 @@
+/**
+ * Copyright (C) 2015 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import junit.framework.TestCase;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * We test the most popular browser + device combos (from sampled logs)
+ * and the ua parser reporting on on those.
+ * <p/>
+ * Test failing will indicate than the newer version of ua parser
+ * is significantly different from the prior one.
+ */
+@RunWith(Parameterized.class)
+public class TestUAParserUserAgentMostPopular extends TestCase {
+
+ UAParser uaParser = null;
+ JSONParser jsonParser = null;
+
+ @Before
+ public void setUp() {
+ uaParser = new UAParser();
+ jsonParser = new JSONParser();
+ }
+
+ @Parameterized.Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][]{
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"8\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"GMozilla/5.0 (Linux; Android 4.4.2; GT-I9505 Build/KOT49H)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile
Safari/537.36", "{\"os_minor\": \"4\", \"os_major\": \"4\", \"device_family\":
\"Samsung GT-I9505\", \"os_family\": \"Android\", \"browser_major\": \"38\",
\"browser_family\": \"Chrome Mobile\"}"},
+ {"Mozilla/5.0 (compatible; YoudaoBot/1.0;
http://www.youdao.com/help/webmaster/spider/; )", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Spider\", \"os_family\": \"Other\",
\"browser_major\": \"-\", \"browser_family\": \"Other\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5)
AppleWebKit/600.1.17 (KHTML, like Gecko) Version/6.2 Safari/537.85.10",
"{\"os_minor\": \"8\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"6\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"36\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) CriOS/38.0.2125.59 Mobile/11D257
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"38\",
\"browser_family\": \"Chrome Mobile iOS\"}"},
+ {"Opera/9.80 (Windows NT 5.1) Presto/2.12.388 Version/12.17",
"{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\",
\"os_family\": \"Windows XP\", \"browser_major\": \"12\", \"browser_family\":
\"Opera\"}"},
+ {"Mozilla/5.0 (Linux; Android 4.4.4; Nexus 5 Build/KTU84P)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile
Safari/537.36", "{\"os_minor\": \"4\", \"os_major\": \"4\", \"device_family\":
\"Nexus 5\", \"os_family\": \"Android\", \"browser_major\": \"38\",
\"browser_family\": \"Chrome Mobile\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_3 like Mac OS X)
AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511
Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/37.0.2062.120 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50",
"{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\",
\"os_family\": \"Windows 8.1\", \"browser_major\": \"25\", \"browser_family\":
\"Opera\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X)
AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329
Safari/8536.25", "{\"os_minor\": \"1\", \"os_major\": \"6\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"6\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"7\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"35\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) CriOS/38.0.2125.59 Mobile/12A405
Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"38\",
\"browser_family\": \"Chrome Mobile iOS\"}"},
+ {"Mozilla/5.0 (Android; Mobile; rv:33.0) Gecko/33.0
Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Android\", \"browser_major\": \"33\",
\"browser_family\": \"Firefox Mobile\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"AppleDictionaryService/208", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\",
\"browser_major\": \"-\", \"browser_family\": \"Other\"}"},
+ {"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
Vista\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 5_1_1 like Mac OS X)
AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206
Safari/7534.48.3", "{\"os_minor\": \"1\", \"os_major\": \"5\",
\"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"5\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (en-us) AppleWebKit/534.14 (KHTML, like Gecko;
Google Wireless Transcoder) Chrome/9.0.597 Safari/534.14", "{\"os_minor\":
\"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\":
\"Other\", \"browser_major\": \"9\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/36.0.1985.125 YaBrowser/14.8.1985.12084 Safari/537.36",
"{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\",
\"os_family\": \"Windows 7\", \"browser_major\": \"14\", \"browser_family\":
\"Yandex Browser\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 8_0 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365
Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"MediaWiki/1.25wmf3", "{\"os_minor\": \"-\", \"os_major\":
\"-\", \"device_family\": \"Other\", \"os_family\": \"Other\",
\"browser_major\": \"-\", \"browser_family\": \"Other\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch;
rv:11.0) like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Other\", \"os_family\": \"Windows 8.1\",
\"browser_major\": \"11\", \"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"6\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388
Version/12.17", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"12\",
\"browser_family\": \"Opera\"}"},
+ {"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:33.0)
Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Other\", \"os_family\": \"Ubuntu\", \"browser_major\":
\"33\", \"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 8_1 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410
Safari/600.1.4", "{\"os_minor\": \"1\", \"os_major\": \"8\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50", "{\"os_minor\":
\"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\":
\"Windows 7\", \"browser_major\": \"25\", \"browser_family\": \"Opera\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_6 like Mac OS X)
AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B651
Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X)
AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e
Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
"{\"os_minor\": \"0\", \"os_major\": \"6\", \"device_family\": \"Spider\",
\"os_family\": \"iOS\", \"browser_major\": \"2\", \"browser_family\":
\"Googlebot\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101
Firefox/31.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"31\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)
AppleWebKit/537.78.2 (KHTML, like Gecko) Version/6.1.6 Safari/537.78.2",
"{\"os_minor\": \"7\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"6\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 6_1_3 like Mac OS X)
AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329
Safari/8536.25", "{\"os_minor\": \"1\", \"os_major\": \"6\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"6\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64;
Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"10\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X)
AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a
Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/36.0.1985.125 YaBrowser/14.8.1985.12084
Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"14\",
\"browser_family\": \"Yandex Browser\"}"},
+ {"Mozilla/5.0 (Windows NT 6.0; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) GSA/4.2.2.38484 Mobile/12A405
Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4)
AppleWebKit/537.77.4 (KHTML, like Gecko) Version/7.0.5 Safari/537.77.4",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0)
Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"9\", \"os_major\": \"10\",
\"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\":
\"32\", \"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4)
AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) GSA/4.2.2.38484 Mobile/11D257
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0;
Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"9\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (compatible; Googlebot/2.1;
+http://www.google.com/bot.html)", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Spider\", \"os_family\": \"Other\", \"browser_major\":
\"2\", \"browser_family\": \"Googlebot\"}"},
+ {"NativeHost", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\":
\"-\", \"browser_family\": \"Other\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)
AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50",
"{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\",
\"os_family\": \"Windows 7\", \"browser_major\": \"25\", \"browser_family\":
\"Opera\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101
Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"33\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 7_1_1 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)
AppleWebKit/534.59.10 (KHTML, like Gecko) Version/5.1.9 Safari/534.59.10",
"{\"os_minor\": \"6\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"5\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"10\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1;
Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"10\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101
Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"33\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
Vista\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"facebookexternalhit/1.1
(+http://www.facebook.com/externalhit_uatext.php)", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\",
\"browser_major\": \"1\", \"browser_family\": \"FacebookBot\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; rv:33.0) Gecko/20100101
Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"33\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365
Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X)
AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a
Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411
Safari/600.1.4", "{\"os_minor\": \"1\", \"os_major\": \"8\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\":
\"Chrome\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64;
Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"10\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
XP\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"-", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\":
\"-\", \"browser_family\": \"Other\"}"},
+ {"Mozilla/5.0 (compatible; bingbot/2.0;
+http://www.bing.com/bingbot.htm)", "{\"os_minor\": \"-\", \"os_major\": \"-\",
\"device_family\": \"Spider\", \"os_family\": \"Other\", \"browser_major\":
\"2\", \"browser_family\": \"bingbot\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)
like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"11\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10)
AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
"{\"os_minor\": \"10\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"8\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8.1\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1;
Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"9\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_1 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)
AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
"{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\",
\"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\":
\"Safari\"}"},
+ {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64;
Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"9\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 8_0_2 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405
Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\":
\"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 5.1; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101
Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"33\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like
Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"11\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
XP\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X)
AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405
Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X)
AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257
Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\":
\"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\",
\"browser_family\": \"Mobile Safari\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0)
like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"11\",
\"browser_family\": \"IE\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101
Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\":
\"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"32\",
\"browser_family\": \"Firefox\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+ {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\",
\"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows
7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"},
+
+ });
+ }
+
+ private String fInput;
+
+ private String fExpected;
+
+
+ public TestUAParserUserAgentMostPopular(String input, String expected) {
+ fInput = input;
+ fExpected = expected;
+ }
+
+
+ @Test
+ public void testMatchingOfMostPopularUA() throws ParseException {
+
+
+ // decode expected output and turn it into an object
+ Object obj = jsonParser.parse(fExpected);
+ JSONObject expected_ua = (JSONObject) obj;
+
+ // Get computed output
+ Map<String, String> computed_ua = uaParser.getUAMap(fInput);
+
+ assertEquals("OS name check", expected_ua.get("os_family"),
+ computed_ua.get("os_family"));
+
+ assertEquals("OS major version check", expected_ua.get("os_major"),
+ computed_ua.get("os_major"));
+
+ assertEquals("OS minor version check", expected_ua.get("os_minor"),
+ computed_ua.get("os_minor"));
+
+ assertEquals("browser check", expected_ua.get("browser_family"),
+ computed_ua.get("browser_family"));
+
+ assertEquals("browser major version check",
expected_ua.get("browser_major"),
+ computed_ua.get("browser_major"));
+
+ assertEquals("device check", expected_ua.get("device_family"),
+ computed_ua.get("device_family"));
+
+ }
+
+}
\ No newline at end of file
diff --git
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java
new file mode 100644
index 0000000..4b28245
--- /dev/null
+++
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java
@@ -0,0 +1,135 @@
+/**
+ * Copyright (C) 2015 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import junit.framework.TestCase;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestUAParserUserAgentRecognition extends TestCase {
+
+ UAParser uaParser = null;
+
+ @Before
+ public void setUp() {
+ uaParser = new UAParser();
+ }
+
+ @Test
+ public void testHappyCase() {
+
+ String ua1 = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:19.0)
Gecko/20100101 Firefox/19.0";
+ String ua2 = "Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us)
AppleWebKit/531.21.10 (KHTML, like Gecko) Mobile/7B405";
+ String ua3 = "Mozilla/5.0 (iPad; CPU OS 7_0_3 like Mac OS X)
AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511
Safari/9537.53";
+
+ Map<String, String> evaled = uaParser.getUAMap(ua1);
+ assertEquals("OS name check", (new String("Ubuntu")),
+ evaled.get("os_family").toString());
+ assertEquals("Browser name check", (new String("Firefox")),
+ evaled.get("browser_family").toString());
+
+
+ evaled = uaParser.getUAMap(ua2);
+ assertEquals("OS name check", (new String("iOS")),
+ evaled.get("os_family").toString());
+
+ assertEquals("Browser name check", (new String("Mobile Safari")),
+ evaled.get("browser_family").toString());
+
+
+ evaled = uaParser.getUAMap(ua3);
+ assertEquals("OS name check", (new String("iOS")),
+ evaled.get("os_family").toString());
+ assertEquals("Browser name check", (new String("Mobile Safari")),
+ evaled.get("browser_family").toString());
+
+ }
+
+ /**
+ * Tests what we return when browser is empty.
+ *
+ * UA parser will return this as "browser obj" for an empty user agent
string:
+ * {
+ * user_agent: {family: "Other", major: null, minor: null, patch: null},
+ * os: {family: "Other", major: null, minor: null, patch: null,
patch_minor: null},
+ * device: {family: "Other"}
+ * }
+ * UDFs returns something like the following:
+ * {
+ * "device_family":"Other",
+ * "browser_major":"-",
+ * "os_family":"Other",
+ * "os_major":"-",
+ * "browser_family":"Other",
+ * "os_minor":"-"
+ * }
+ **/
+ @Test
+ public void testEmptyUA() {
+
+ Map<String, String> evaled = uaParser.getUAMap("");
+
+ String resultOSName = evaled.get("os_family");
+ String resultBrowserName = evaled.get("browser_family");
+ String resultOsMinor = evaled.get("os_minor");
+ assertEquals("OS name check", (new String("Other")),
+ resultOSName.toString());
+ assertEquals("Browser name check", (new String("Other")),
+ resultBrowserName.toString());
+
+ assertEquals("OS minor", (new String("-")),
+ resultOsMinor.toString());
+ }
+
+ /**
+ * Tests what we return when browser is null
+ *
+ * {
+ * "device_family":"-",
+ * "browser_major":"-",
+ * "os_family":"-",
+ * "os_major":"-",
+ * "browser_family":"-",
+ * "os_minor":"-"
+ * }
+ **/
+ public void testHandlingOfNulls() {
+
+ Map<String, String> evaled = uaParser.getUAMap(null);
+
+ String resultOSName = evaled.get("os_family");
+ String resultBrowserName = evaled.get("browser_family");
+ String resultOsMinor = evaled.get("os_minor");
+ assertEquals("OS name check", (new String("-")),
+ resultOSName.toString());
+ assertEquals("Browser name check", (new String("-")),
+ resultBrowserName.toString());
+
+ assertEquals("OS minor", (new String("-")),
+ resultOsMinor.toString());
+
+ }
+
+}
\ No newline at end of file
diff --git a/refinery-hive/pom.xml b/refinery-hive/pom.xml
index e7833fe..f601c38 100644
--- a/refinery-hive/pom.xml
+++ b/refinery-hive/pom.xml
@@ -42,10 +42,6 @@
</dependency>
<dependency>
- <groupId>ua_parser</groupId>
- <artifactId>ua-parser</artifactId>
- </dependency>
- <dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
</dependency>
diff --git
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java
index 7cf97ae..27563ed 100644
---
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java
+++
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java
@@ -15,9 +15,8 @@
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.log4j.Logger;
-import ua_parser.*;
+import org.wikimedia.analytics.refinery.core.UAParser;
-import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@@ -34,12 +33,6 @@
* }
* <p/>
* Records are processed one by one.
- *
- * NOTE: This UDF was original coded as part of the Kraken repository, and
may have not
- * received as high of qualitiy code review as we would like. However, it
works as is,
- * and we want to make this functionality available for use. Please also note
that there
- * is currently not a process for ensuring that the dependent ua_parser
package is up to date
- * with the latest user agent classification regexes.
*/
@UDFType(deterministic = true)
@@ -47,8 +40,8 @@
+ "Returns a map with browser_name, browser_major, device, os_name,
os_minor, os_major keys and "
+ "the appropriate values for each of them")
public class UAParserUDF extends GenericUDF {
- Map<String, String> result = new HashMap<String, String>();
- public CachingParser cachingParser;
+ private Map<String, String> empytMap = new HashMap<String,String>();
+ private UAParser uaParser;
private ObjectInspector argumentOI;
// TODO figure out why not everything is logged to hive.log and some
logging
@@ -100,15 +93,8 @@
}
-
- try {
- cachingParser = new CachingParser();
- } catch (IOException e) {
- // no recovery should be possible, log and rethrow
- // runtime exception will be logged to stdout by default
- Log.error(e.getMessage(), e);
- throw new RuntimeException("Failed to instantiate CachingParser");
- }
+ // Instantiate the UAParser
+ uaParser = new UAParser();
argumentOI = arg;
return ObjectInspectorFactory.getStandardMapObjectInspector(
@@ -116,17 +102,7 @@
PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
- private final String NA = "-";
- private String replaceNA(String str) {
- final String ret;
- if (str == null || str.isEmpty() || str.equals("-")) {
- ret = NA;
- } else {
- ret = str;
- }
- return ret;
- }
/**
* Takes the actual arguments and returns the result.
@@ -148,57 +124,15 @@
@SuppressWarnings("unchecked")
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- result.clear();
+ assert uaParser != null: "Evaluate called without initializing
'uaParser'";
- UserAgent browser = null;
- Device device = null;
- OS os = null;
-
- try {
- if (arguments.length == 1
- && argumentOI != null && arguments[0] != null) {
- Client c;
- String pattern = ((StringObjectInspector) argumentOI)
- .getPrimitiveJavaObject(arguments[0].get());
-
- c = cachingParser.parse(pattern);
- if (c != null) {
- browser = c.userAgent;
- device = c.device;
- os = c.os;
- }
- }
- } catch (Exception e) {
- // catch it all to make sure job does not halt if one record is
faulty
- // TODO find out why this gets logged to hadoop but not to hive.log
- Log.error(e.getMessage(), e);
+ if (arguments.length == 1 && argumentOI != null && arguments[0] !=
null) {
+ String ua = ((StringObjectInspector)
argumentOI).getPrimitiveJavaObject(arguments[0].get());
+ return uaParser.getUAMap(ua);
}
- if (browser != null) {
- result.put("browser_family", replaceNA(browser.family));
- result.put("browser_major", replaceNA(browser.major));
- } else {
- result.put("browser_family", NA);
- result.put("browser_major", NA);
- }
-
- if (device != null) {
- result.put("device_family", replaceNA(device.family));
- } else {
- result.put("device_family", NA);
- }
-
- if (os != null) {
- result.put("os_family", replaceNA(os.family));
- result.put("os_major", replaceNA(os.major));
- result.put("os_minor", replaceNA(os.minor));
- } else {
- result.put("os_family", NA);
- result.put("os_major", NA);
- result.put("os_minor", NA);
- }
-
- return result;
+ // Return an empty map in case of arguments irregularity
+ return empytMap;
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/195952
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I77a3cf57ed96658b763370423fe79bb1b4aded4d
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits