Joal has uploaded a new change for review. https://gerrit.wikimedia.org/r/195952
Change subject: Move UAParser wrapper to refinery-core and update refinery-hive accordingly. ...................................................................... Move UAParser wrapper to refinery-core and update refinery-hive accordingly. Change-Id: I77a3cf57ed96658b763370423fe79bb1b4aded4d --- M refinery-core/pom.xml A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java A refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java A refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java M refinery-hive/pom.xml M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java 6 files changed, 479 insertions(+), 81 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source refs/changes/52/195952/1 diff --git a/refinery-core/pom.xml b/refinery-core/pom.xml index d8a7a6f..c562502 100644 --- a/refinery-core/pom.xml +++ b/refinery-core/pom.xml @@ -56,6 +56,16 @@ <artifactId>commons-lang3</artifactId> </dependency> + <dependency> + <groupId>ua_parser</groupId> + <artifactId>ua-parser</artifactId> + </dependency> + + <dependency> + <groupId>com.googlecode.json-simple</groupId> + <artifactId>json-simple</artifactId> + </dependency> + </dependencies> <build> diff --git a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java new file mode 100644 index 0000000..2acab8c --- /dev/null +++ b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/UAParser.java @@ -0,0 +1,117 @@ +/** + * Copyright (C) 2015 Wikimedia Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wikimedia.analytics.refinery.core; + + +import org.apache.log4j.Logger; +import ua_parser.*; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Contains functions to parse user agent string using ua-parser library + */ +public class UAParser { + + public static final String NA = "-"; + + static final Logger LOG = Logger.getLogger(UAParser.class.getName()); + + private CachingParser cachingParser; + private Map<String, String> result = new HashMap<String, String>(); + + /** + * Function replacing null/empty string with the NA one. + * @param str the string to check + * @return the original string if not null/empty, NA otherwise + */ + private String replaceNA(String str) { + final String ret; + if (str == null || str.isEmpty() || str.equals("-")) { + ret = NA; + } else { + ret = str; + } + return ret; + } + + public UAParser() { + try { + cachingParser = new CachingParser(); + } catch (IOException e) { + // no recovery should be possible, log and rethrow + // runtime exception will be logged to stdout by default + LOG.error(e.getMessage(), e); + throw new RuntimeException("Failed to instantiate CachingParser"); + } + } + + /** + * Function extracting browser, device and os information from the UA string. + * @param uaString the ua string to parse + * @return the ua map with browser_name, browser_major, device, os_name, + * os_minor, os_major keys and associated values. + */ + public Map<String, String> getUAMap(String uaString) { + result.clear(); + + UserAgent browser = null; + Device device = null; + OS os = null; + + try { + Client c = cachingParser.parse(uaString); + if (c != null) { + browser = c.userAgent; + device = c.device; + os = c.os; + } + } catch (Exception e) { + // catch it all to make sure job does not halt if one record is faulty + LOG.error(e.getMessage(), e); + } + + if (browser != null) { + result.put("browser_family", replaceNA(browser.family)); + result.put("browser_major", replaceNA(browser.major)); + } else { + result.put("browser_family", NA); + result.put("browser_major", NA); + } + + if (device != null) { + result.put("device_family", replaceNA(device.family)); + } else { + result.put("device_family", NA); + } + + if (os != null) { + result.put("os_family", replaceNA(os.family)); + result.put("os_major", replaceNA(os.major)); + result.put("os_minor", replaceNA(os.minor)); + } else { + result.put("os_family", NA); + result.put("os_major", NA); + result.put("os_minor", NA); + } + + return result; + } + +} diff --git a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java new file mode 100644 index 0000000..d1f224f --- /dev/null +++ b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentMostPopular.java @@ -0,0 +1,206 @@ +/** + * Copyright (C) 2015 Wikimedia Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wikimedia.analytics.refinery.core; + +import junit.framework.TestCase; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * We test the most popular browser + device combos (from sampled logs) + * and the ua parser reporting on on those. + * <p/> + * Test failing will indicate than the newer version of ua parser + * is significantly different from the prior one. + */ +@RunWith(Parameterized.class) +public class TestUAParserUserAgentMostPopular extends TestCase { + + UAParser uaParser = null; + JSONParser jsonParser = null; + + @Before + public void setUp() { + uaParser = new UAParser(); + jsonParser = new JSONParser(); + } + + @Parameterized.Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][]{ + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"8\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"GMozilla/5.0 (Linux; Android 4.4.2; GT-I9505 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile Safari/537.36", "{\"os_minor\": \"4\", \"os_major\": \"4\", \"device_family\": \"Samsung GT-I9505\", \"os_family\": \"Android\", \"browser_major\": \"38\", \"browser_family\": \"Chrome Mobile\"}"}, + {"Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Spider\", \"os_family\": \"Other\", \"browser_major\": \"-\", \"browser_family\": \"Other\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/6.2 Safari/537.85.10", "{\"os_minor\": \"8\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"6\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"36\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) CriOS/38.0.2125.59 Mobile/11D257 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"38\", \"browser_family\": \"Chrome Mobile iOS\"}"}, + {"Opera/9.80 (Windows NT 5.1) Presto/2.12.388 Version/12.17", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"12\", \"browser_family\": \"Opera\"}"}, + {"Mozilla/5.0 (Linux; Android 4.4.4; Nexus 5 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile Safari/537.36", "{\"os_minor\": \"4\", \"os_major\": \"4\", \"device_family\": \"Nexus 5\", \"os_family\": \"Android\", \"browser_major\": \"38\", \"browser_family\": \"Chrome Mobile\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"25\", \"browser_family\": \"Opera\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.25", "{\"os_minor\": \"1\", \"os_major\": \"6\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"6\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"7\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"35\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) CriOS/38.0.2125.59 Mobile/12A405 Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"38\", \"browser_family\": \"Chrome Mobile iOS\"}"}, + {"Mozilla/5.0 (Android; Mobile; rv:33.0) Gecko/33.0 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Android\", \"browser_major\": \"33\", \"browser_family\": \"Firefox Mobile\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"AppleDictionaryService/208", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"-\", \"browser_family\": \"Other\"}"}, + {"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3", "{\"os_minor\": \"1\", \"os_major\": \"5\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"5\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (en-us) AppleWebKit/534.14 (KHTML, like Gecko; Google Wireless Transcoder) Chrome/9.0.597 Safari/534.14", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"9\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 YaBrowser/14.8.1985.12084 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"14\", \"browser_family\": \"Yandex Browser\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"MediaWiki/1.25wmf3", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"-\", \"browser_family\": \"Other\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"11\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"6\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.17", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"12\", \"browser_family\": \"Opera\"}"}, + {"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:33.0) Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Ubuntu\", \"browser_major\": \"33\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4", "{\"os_minor\": \"1\", \"os_major\": \"8\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"25\", \"browser_family\": \"Opera\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_6 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B651 Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "{\"os_minor\": \"0\", \"os_major\": \"6\", \"device_family\": \"Spider\", \"os_family\": \"iOS\", \"browser_major\": \"2\", \"browser_family\": \"Googlebot\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"31\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.78.2 (KHTML, like Gecko) Version/6.1.6 Safari/537.78.2", "{\"os_minor\": \"7\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"6\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.25", "{\"os_minor\": \"1\", \"os_major\": \"6\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"6\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"10\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 YaBrowser/14.8.1985.12084 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"14\", \"browser_family\": \"Yandex Browser\"}"}, + {"Mozilla/5.0 (Windows NT 6.0; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) GSA/4.2.2.38484 Mobile/12A405 Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.77.4 (KHTML, like Gecko) Version/7.0.5 Safari/537.77.4", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) GSA/4.2.2.38484 Mobile/11D257 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"9\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Spider\", \"os_family\": \"Other\", \"browser_major\": \"2\", \"browser_family\": \"Googlebot\"}"}, + {"NativeHost", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"-\", \"browser_family\": \"Other\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 OPR/25.0.1614.50", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"25\", \"browser_family\": \"Opera\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"33\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.59.10 (KHTML, like Gecko) Version/5.1.9 Safari/534.59.10", "{\"os_minor\": \"6\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"5\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"10\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"10\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"33\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows Vista\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"1\", \"browser_family\": \"FacebookBot\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; rv:33.0) Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"33\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53", "{\"os_minor\": \"0\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4", "{\"os_minor\": \"1\", \"os_major\": \"8\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"10\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"-", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Other\", \"browser_major\": \"-\", \"browser_family\": \"Other\"}"}, + {"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Spider\", \"os_family\": \"Other\", \"browser_major\": \"2\", \"browser_family\": \"bingbot\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"11\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25", "{\"os_minor\": \"10\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"8\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"9\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10", "{\"os_minor\": \"9\", \"os_major\": \"10\", \"device_family\": \"Other\", \"os_family\": \"Mac OS X\", \"browser_major\": \"7\", \"browser_family\": \"Safari\"}"}, + {"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"9\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPad\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 5.1; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"33\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"11\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows XP\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"37\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4", "{\"os_minor\": \"0\", \"os_major\": \"8\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"8\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 8.1\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53", "{\"os_minor\": \"1\", \"os_major\": \"7\", \"device_family\": \"iPhone\", \"os_family\": \"iOS\", \"browser_major\": \"7\", \"browser_family\": \"Mobile Safari\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"11\", \"browser_family\": \"IE\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"32\", \"browser_family\": \"Firefox\"}"}, + {"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "{\"os_minor\": \"-\", \"os_major\": \"-\", \"device_family\": \"Other\", \"os_family\": \"Windows 7\", \"browser_major\": \"38\", \"browser_family\": \"Chrome\"}"}, + + }); + } + + private String fInput; + + private String fExpected; + + + public TestUAParserUserAgentMostPopular(String input, String expected) { + fInput = input; + fExpected = expected; + } + + + @Test + public void testMatchingOfMostPopularUA() throws ParseException { + + + // decode expected output and turn it into an object + Object obj = jsonParser.parse(fExpected); + JSONObject expected_ua = (JSONObject) obj; + + // Get computed output + Map<String, String> computed_ua = uaParser.getUAMap(fInput); + + assertEquals("OS name check", expected_ua.get("os_family"), + computed_ua.get("os_family")); + + assertEquals("OS major version check", expected_ua.get("os_major"), + computed_ua.get("os_major")); + + assertEquals("OS minor version check", expected_ua.get("os_minor"), + computed_ua.get("os_minor")); + + assertEquals("browser check", expected_ua.get("browser_family"), + computed_ua.get("browser_family")); + + assertEquals("browser major version check", expected_ua.get("browser_major"), + computed_ua.get("browser_major")); + + assertEquals("device check", expected_ua.get("device_family"), + computed_ua.get("device_family")); + + } + +} \ No newline at end of file diff --git a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java new file mode 100644 index 0000000..4b28245 --- /dev/null +++ b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestUAParserUserAgentRecognition.java @@ -0,0 +1,135 @@ +/** + * Copyright (C) 2015 Wikimedia Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wikimedia.analytics.refinery.core; + +import junit.framework.TestCase; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class TestUAParserUserAgentRecognition extends TestCase { + + UAParser uaParser = null; + + @Before + public void setUp() { + uaParser = new UAParser(); + } + + @Test + public void testHappyCase() { + + String ua1 = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:19.0) Gecko/20100101 Firefox/19.0"; + String ua2 = "Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Mobile/7B405"; + String ua3 = "Mozilla/5.0 (iPad; CPU OS 7_0_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53"; + + Map<String, String> evaled = uaParser.getUAMap(ua1); + assertEquals("OS name check", (new String("Ubuntu")), + evaled.get("os_family").toString()); + assertEquals("Browser name check", (new String("Firefox")), + evaled.get("browser_family").toString()); + + + evaled = uaParser.getUAMap(ua2); + assertEquals("OS name check", (new String("iOS")), + evaled.get("os_family").toString()); + + assertEquals("Browser name check", (new String("Mobile Safari")), + evaled.get("browser_family").toString()); + + + evaled = uaParser.getUAMap(ua3); + assertEquals("OS name check", (new String("iOS")), + evaled.get("os_family").toString()); + assertEquals("Browser name check", (new String("Mobile Safari")), + evaled.get("browser_family").toString()); + + } + + /** + * Tests what we return when browser is empty. + * + * UA parser will return this as "browser obj" for an empty user agent string: + * { + * user_agent: {family: "Other", major: null, minor: null, patch: null}, + * os: {family: "Other", major: null, minor: null, patch: null, patch_minor: null}, + * device: {family: "Other"} + * } + * UDFs returns something like the following: + * { + * "device_family":"Other", + * "browser_major":"-", + * "os_family":"Other", + * "os_major":"-", + * "browser_family":"Other", + * "os_minor":"-" + * } + **/ + @Test + public void testEmptyUA() { + + Map<String, String> evaled = uaParser.getUAMap(""); + + String resultOSName = evaled.get("os_family"); + String resultBrowserName = evaled.get("browser_family"); + String resultOsMinor = evaled.get("os_minor"); + assertEquals("OS name check", (new String("Other")), + resultOSName.toString()); + assertEquals("Browser name check", (new String("Other")), + resultBrowserName.toString()); + + assertEquals("OS minor", (new String("-")), + resultOsMinor.toString()); + } + + /** + * Tests what we return when browser is null + * + * { + * "device_family":"-", + * "browser_major":"-", + * "os_family":"-", + * "os_major":"-", + * "browser_family":"-", + * "os_minor":"-" + * } + **/ + public void testHandlingOfNulls() { + + Map<String, String> evaled = uaParser.getUAMap(null); + + String resultOSName = evaled.get("os_family"); + String resultBrowserName = evaled.get("browser_family"); + String resultOsMinor = evaled.get("os_minor"); + assertEquals("OS name check", (new String("-")), + resultOSName.toString()); + assertEquals("Browser name check", (new String("-")), + resultBrowserName.toString()); + + assertEquals("OS minor", (new String("-")), + resultOsMinor.toString()); + + } + +} \ No newline at end of file diff --git a/refinery-hive/pom.xml b/refinery-hive/pom.xml index e7833fe..f601c38 100644 --- a/refinery-hive/pom.xml +++ b/refinery-hive/pom.xml @@ -42,10 +42,6 @@ </dependency> <dependency> - <groupId>ua_parser</groupId> - <artifactId>ua-parser</artifactId> - </dependency> - <dependency> <groupId>com.googlecode.json-simple</groupId> <artifactId>json-simple</artifactId> </dependency> diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java index 7cf97ae..27563ed 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/UAParserUDF.java @@ -15,9 +15,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.log4j.Logger; -import ua_parser.*; +import org.wikimedia.analytics.refinery.core.UAParser; -import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -34,12 +33,6 @@ * } * <p/> * Records are processed one by one. - * - * NOTE: This UDF was original coded as part of the Kraken repository, and may have not - * received as high of qualitiy code review as we would like. However, it works as is, - * and we want to make this functionality available for use. Please also note that there - * is currently not a process for ensuring that the dependent ua_parser package is up to date - * with the latest user agent classification regexes. */ @UDFType(deterministic = true) @@ -47,8 +40,8 @@ + "Returns a map with browser_name, browser_major, device, os_name, os_minor, os_major keys and " + "the appropriate values for each of them") public class UAParserUDF extends GenericUDF { - Map<String, String> result = new HashMap<String, String>(); - public CachingParser cachingParser; + private Map<String, String> empytMap = new HashMap<String,String>(); + private UAParser uaParser; private ObjectInspector argumentOI; // TODO figure out why not everything is logged to hive.log and some logging @@ -100,15 +93,8 @@ } - - try { - cachingParser = new CachingParser(); - } catch (IOException e) { - // no recovery should be possible, log and rethrow - // runtime exception will be logged to stdout by default - Log.error(e.getMessage(), e); - throw new RuntimeException("Failed to instantiate CachingParser"); - } + // Instantiate the UAParser + uaParser = new UAParser(); argumentOI = arg; return ObjectInspectorFactory.getStandardMapObjectInspector( @@ -116,17 +102,7 @@ PrimitiveObjectInspectorFactory.javaStringObjectInspector); } - private final String NA = "-"; - private String replaceNA(String str) { - final String ret; - if (str == null || str.isEmpty() || str.equals("-")) { - ret = NA; - } else { - ret = str; - } - return ret; - } /** * Takes the actual arguments and returns the result. @@ -148,57 +124,15 @@ @SuppressWarnings("unchecked") @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - result.clear(); + assert uaParser != null: "Evaluate called without initializing 'uaParser'"; - UserAgent browser = null; - Device device = null; - OS os = null; - - try { - if (arguments.length == 1 - && argumentOI != null && arguments[0] != null) { - Client c; - String pattern = ((StringObjectInspector) argumentOI) - .getPrimitiveJavaObject(arguments[0].get()); - - c = cachingParser.parse(pattern); - if (c != null) { - browser = c.userAgent; - device = c.device; - os = c.os; - } - } - } catch (Exception e) { - // catch it all to make sure job does not halt if one record is faulty - // TODO find out why this gets logged to hadoop but not to hive.log - Log.error(e.getMessage(), e); + if (arguments.length == 1 && argumentOI != null && arguments[0] != null) { + String ua = ((StringObjectInspector) argumentOI).getPrimitiveJavaObject(arguments[0].get()); + return uaParser.getUAMap(ua); } - if (browser != null) { - result.put("browser_family", replaceNA(browser.family)); - result.put("browser_major", replaceNA(browser.major)); - } else { - result.put("browser_family", NA); - result.put("browser_major", NA); - } - - if (device != null) { - result.put("device_family", replaceNA(device.family)); - } else { - result.put("device_family", NA); - } - - if (os != null) { - result.put("os_family", replaceNA(os.family)); - result.put("os_major", replaceNA(os.major)); - result.put("os_minor", replaceNA(os.minor)); - } else { - result.put("os_family", NA); - result.put("os_major", NA); - result.put("os_minor", NA); - } - - return result; + // Return an empty map in case of arguments irregularity + return empytMap; } /** -- To view, visit https://gerrit.wikimedia.org/r/195952 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I77a3cf57ed96658b763370423fe79bb1b4aded4d Gerrit-PatchSet: 1 Gerrit-Project: analytics/refinery/source Gerrit-Branch: master Gerrit-Owner: Joal <j...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits