Nuria has submitted this change and it was merged. Change subject: De-static-everything ......................................................................
De-static-everything Why else would I call the branch 'degaussing'? This is an architectural change to do something we've been talking about for a while - namely, moving the generic functions for string or regex comparisons out of specific classes, and then making the classes dynamic rather than static, because not everything needs to be static. This patch: 1. Creates a "Utilities" class containing the regex and string checker methods that previously lived in the Pageviews class. It doesn't have unit tests because if those break, everything above it also breaks. 2. Makes everything non-static (except for the Utilities, because they really should be, and the referer...thing, because I took one look at it and decided I probably shouldn't risk touching it). 3. Amends all the UDFs and unit tests so that they instantiate the classes to call their public methods, rather than assuming they're static methods that can just be called as-is. Ackbar is back. Change-Id: I7c5928a7f8a9bd6fe7eae0ecc546ff2122b862d0 --- M changelog.md R refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageviewDefinition.java R refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Utilities.java M refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java R refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageviewDefinition.java M refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java M refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetAccessMethodUDF.java M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetXAnalyticsValueUDF.java M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsCrawlerUDF.java M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsLegacyPageviewUDF.java M refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsPageviewUDF.java 13 files changed, 186 insertions(+), 91 deletions(-) Approvals: Nuria: Verified; Looks good to me, approved diff --git a/changelog.md b/changelog.md index c0958ae..86ed3c9 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,6 @@ ## v0.0.9-SNAPSHOT - +* Generic functions used in multiple classes now live in a single "utilities" class. +* Not everything is static. ## v0.0.8 * Stop counting edit attempts as pageviews diff --git a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageview.java b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageviewDefinition.java similarity index 61% rename from refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageview.java rename to refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageviewDefinition.java index b6edb2d..5fc4160 100644 --- a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageview.java +++ b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/LegacyPageviewDefinition.java @@ -19,7 +19,7 @@ import java.util.Arrays; /** - * Static functions to identify what requests constitute "pageviews", + * Functions to identify what requests constitute "pageviews", * according to the definition at * https://github.com/wikimedia/analytics-refinery/blob/master/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql * This is the "legacy" definition, in use by WebStatsCollector and the @@ -27,35 +27,56 @@ * from 2007 to early 2015, and is to be superseded by the "Pageview" class * and isPageview method. */ -public class LegacyPageview { +public class LegacyPageviewDefinition { - private static final Pattern acceptedUriHostsPattern = Pattern.compile( + /* + * Meta-methods to enable eager instantiation in a singleton-based way. + * in non-Java terms: you get to only create one class instance, and only + * when you need it, instead of always having everything (static/eager instantiation) + * or always generating everything anew (!singletons). So we have: + * (1) an instance; + * (2) an empty constructor (to avoid people just calling the constructor); + * (3) an actual getInstance method to allow for instantiation. + */ + private static final LegacyPageviewDefinition instance = new LegacyPageviewDefinition(); + + private LegacyPageviewDefinition() { + } + + public static LegacyPageviewDefinition getInstance(){ + return instance; + } + + /* + * Now back to the good part. + */ + private final Pattern acceptedUriHostsPattern = Pattern.compile( "\\.(mediawiki|wik(ibooks|idata|imediafoundation|inews|ipedia|iquote|isource|tionary|iversity|ivoyage))\\.org$" ); - private static final Pattern acceptedMetaUriHostsPattern = Pattern.compile( + private final Pattern acceptedMetaUriHostsPattern = Pattern.compile( "(commons|incubator|meta|outreach|quality|species|strategy|usability)(\\.m)?\\.wikimedia\\.org$" ); - private static final Pattern acceptedUriPattern = Pattern.compile( + private final Pattern acceptedUriPattern = Pattern.compile( "^/wiki/" ); - private static final Pattern rejectedUriPattern = Pattern.compile( + private final Pattern rejectedUriPattern = Pattern.compile( "^/wiki/Special\\:CentralAutoLogin/" ); - private static final HashSet<String> rejectedUriPathPages = new HashSet<String>(Arrays.asList( + private final HashSet<String> rejectedUriPathPages = new HashSet<String>(Arrays.asList( "/wiki/undefined", "/wiki/Undefined" )); - private static final HashSet<String> rejectedStatusCodes = new HashSet<String>(Arrays.asList( + private final HashSet<String> rejectedStatusCodes = new HashSet<String>(Arrays.asList( "301", "302", "303" )); - private static final Pattern rejectedIPPattern = Pattern.compile( + private final Pattern rejectedIPPattern = Pattern.compile( "^(10\\.20\\.0|10\\.64\\.0|10\\.128\\.0|10\\.64\\.32|208\\.80\\.15[2-5]|91\\.198\\.174)\\..+" ); @@ -70,7 +91,7 @@ * @param uriQuery Query portion of the URI * @param httpStatus HTTP request status code */ - public static boolean isLegacyPageview( + public boolean isLegacyPageview( String ip, String xForwardedFor, String uriHost, @@ -84,19 +105,19 @@ //The host is a "recognised" project && ( - Pageview.patternIsFound(acceptedUriHostsPattern, uriHost) - || Pageview.patternIsFound(acceptedMetaUriHostsPattern, uriHost) + Utilities.patternIsFound(acceptedUriHostsPattern, uriHost) + || Utilities.patternIsFound(acceptedMetaUriHostsPattern, uriHost) ) //The URI path starts with /wiki/, and //isn't to undefined, Undefined or Special:CentralAutoLogin - && Pageview.patternIsFound(acceptedUriPattern, uriPath) - && !Pageview.patternIsFound(rejectedUriPattern, uriPath) + && Utilities.patternIsFound(acceptedUriPattern, uriPath) + && !Utilities.patternIsFound(rejectedUriPattern, uriPath) && !rejectedUriPathPages.contains(uriPath) //The source IP isn't in a specified range (or, //is, but the XFF field is not empty) && ( - !Pageview.patternIsFound(rejectedIPPattern, ip) + !Utilities.patternIsFound(rejectedIPPattern, ip) || !xForwardedFor.equals("-") ) ); diff --git a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Pageview.java b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java similarity index 63% rename from refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Pageview.java rename to refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java index c909e7a..803aba6 100644 --- a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Pageview.java +++ b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java @@ -24,45 +24,66 @@ * Static functions to work with Wikimedia webrequest data. * This class was orignally created while reading https://gist.github.com/Ironholds/96558613fe38dd4d1961 */ -public class Pageview { +public class PageviewDefinition { - private static final Pattern uriHostWikimediaDomainPattern = Pattern.compile( + /* + * Meta-methods to enable eager instantiation in a singleton-based way. + * in non-Java terms: you get to only create one class instance, and only + * when you need it, instead of always having everything (static/eager instantiation) + * or always generating everything anew (!singletons). So we have: + * (1) an instance; + * (2) an empty constructor (to avoid people just calling the constructor); + * (3) an actual getInstance method to allow for instantiation. + */ + private static final PageviewDefinition instance = new PageviewDefinition(); + + private PageviewDefinition() { + } + + public static PageviewDefinition getInstance(){ + return instance; + } + + /* + * Now back to the good part. + */ + private final Pattern uriHostWikimediaDomainPattern = Pattern.compile( "(commons|meta|incubator|species)\\." // any of these domain names + "((m|mobile|wap|zero)\\.)?" // followed by an optional mobile or zero qualifier + "wikimedia\\.org$" // ending with wikimedia.org ); - private static final Pattern uriHostProjectDomainPattern = Pattern.compile( + private final Pattern uriHostProjectDomainPattern = Pattern.compile( "(?<!www)\\." // not starting with "www" + "(wik(ibooks|" // match project domains ending in .org + "inews|ipedia|iquote|isource|tionary|iversity|ivoyage))\\.org$" ); - private static final Pattern uriHostOtherProjectsPattern = Pattern.compile( + private final Pattern uriHostOtherProjectsPattern = Pattern.compile( "(wikidata|mediawiki)\\.org$" ); - private static final Pattern uriPathPattern = Pattern.compile( + private final Pattern uriPathPattern = Pattern.compile( "^(/sr(-(ec|el))?|/w(iki)?|/zh(-(cn|hans|hant|hk|mo|my|sg|tw))?)/" ); - private static final Pattern uriQueryPattern = Pattern.compile( + private final Pattern uriQueryPattern = Pattern.compile( "\\?((cur|old)id|title|search)=" ); - private static final Pattern uriPathUnwantedSpecialPagesPattern = Pattern.compile( + private final Pattern uriPathUnwantedSpecialPagesPattern = Pattern.compile( "BannerRandom|CentralAutoLogin|MobileEditor|Undefined|UserLogin|ZeroRatedMobileAccess" ); - private static final Pattern uriQueryUnwantedSpecialPagesPattern = Pattern.compile( + private final Pattern uriQueryUnwantedSpecialPagesPattern = Pattern.compile( "CentralAutoLogin|MobileEditor|UserLogin|ZeroRatedMobileAccess" ); - private static final Pattern uriQueryUnwantedActions = Pattern.compile( + private final Pattern uriQueryUnwantedActions = Pattern.compile( "action=edit" ); - private static final HashSet<String> contentTypesSet = new HashSet<String>(Arrays.asList( + private final HashSet<String> contentTypesSet = new HashSet<String>(Arrays.asList( "text/html", "text/html; charset=iso-8859-1", "text/html; charset=ISO-8859-1", @@ -70,7 +91,7 @@ "text/html; charset=UTF-8" )); - private static final HashSet<String> httpStatusesSet = new HashSet<String>(Arrays.asList( + private final HashSet<String> httpStatusesSet = new HashSet<String>(Arrays.asList( "200", "304" )); @@ -78,35 +99,7 @@ /** * All API request uriPaths will contain this */ - private static final String uriPathAPI = "api.php"; - - - /** - * Check if the target is contained within string. This is - * just a convenience method that also makes sure that arguments are not null. - * - * @param string String to search in - * @param target String to search for - * @return boolean - */ - private static boolean stringContains(String string, String target){ - return (target != null && string != null && string.contains(target)); - } - - - /** - * Convenience method for Using Matcher.find() to check if - * the given regex Pattern matches the target String. - * Also called in the LegacyPageview class. - * - * @param Pattern pattern - * @param String target - * - * @return boolean - */ - public static boolean patternIsFound(Pattern pattern, String target) { - return pattern.matcher(target).find(); - } + private final String uriPathAPI = "api.php"; /** @@ -119,7 +112,7 @@ * * @return boolean */ - private static boolean isAppPageRequest( + private boolean isAppPageRequest( String uriPath, String uriQuery, String contentType, @@ -131,10 +124,10 @@ final String appPageURIQuery = "sections=0"; return ( - stringContains(uriPath, uriPathAPI) - && stringContains(uriQuery, appPageURIQuery) - && stringContains(contentType, appContentType) - && stringContains(userAgent, appUserAgent) + Utilities.stringContains(uriPath, uriPathAPI) + && Utilities.stringContains(uriQuery, appPageURIQuery) + && Utilities.stringContains(contentType, appContentType) + && Utilities.stringContains(userAgent, appUserAgent) ); } @@ -155,7 +148,7 @@ * * @return boolean */ - public static boolean isPageview( + public boolean isPageview( String uriHost, String uriPath, String uriQuery, @@ -170,28 +163,28 @@ httpStatusesSet.contains(httpStatus) // check for a regular pageview contentType, or a an API contentType && ( - (contentTypesSet.contains(contentType) && !stringContains(uriPath, uriPathAPI)) + (contentTypesSet.contains(contentType) && !Utilities.stringContains(uriPath, uriPathAPI)) || isAppPageRequest(uriPath, uriQuery, contentType, userAgent) ) // A pageview must be from either a wikimedia.org domain, // or a 'project' domain, e.g. en.wikipedia.org && ( - patternIsFound(uriHostWikimediaDomainPattern, uriHost) - || patternIsFound(uriHostOtherProjectsPattern, uriHost) - || patternIsFound(uriHostProjectDomainPattern, uriHost) + Utilities.patternIsFound(uriHostWikimediaDomainPattern, uriHost) + || Utilities.patternIsFound(uriHostOtherProjectsPattern, uriHost) + || Utilities.patternIsFound(uriHostProjectDomainPattern, uriHost) ) // Either a pageview's uriPath will match the first pattern, // or its uriQuery will match the second && ( - patternIsFound(uriPathPattern, uriPath) - || patternIsFound(uriQueryPattern, uriQuery) + Utilities.patternIsFound(uriPathPattern, uriPath) + || Utilities.patternIsFound(uriQueryPattern, uriQuery) ) // A pageview will not have these Special: pages in the uriPath or uriQuery - && !patternIsFound(uriPathUnwantedSpecialPagesPattern, uriPath) - && !patternIsFound(uriQueryUnwantedSpecialPagesPattern, uriQuery) + && !Utilities.patternIsFound(uriPathUnwantedSpecialPagesPattern, uriPath) + && !Utilities.patternIsFound(uriQueryUnwantedSpecialPagesPattern, uriQuery) // Edits now come through as text/html. They should not be included. // Luckily the query parameter does not seem to be localised. - && !patternIsFound(uriQueryUnwantedActions, uriQuery) + && !Utilities.patternIsFound(uriQueryUnwantedActions, uriQuery) ); } } \ No newline at end of file diff --git a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Utilities.java b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Utilities.java new file mode 100644 index 0000000..e98e714 --- /dev/null +++ b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Utilities.java @@ -0,0 +1,53 @@ +/** + * Copyright (C) 2015 Wikimedia Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wikimedia.analytics.refinery.core; + +import java.util.regex.Pattern; + +/** + * Static functions to work with Wikimedia data, broadly construed; + * this is where we put the functions generic enough to be reused + * over and over again in other classes. + */ +public class Utilities { + + /** + * Check if the target is contained within string. This is + * just a convenience method that also makes sure that arguments are not null. + * + * @param string String to search in + * @param target String to search for + * @return boolean + */ + public static boolean stringContains(String string, String target){ + return (target != null && string != null && string.contains(target)); + } + + /** + * Convenience method for Using Matcher.find() to check if + * the given regex Pattern matches the target String. + * Also called in the LegacyPageview class. + * + * @param Pattern pattern + * @param String target + * + * @return boolean + */ + public static boolean patternIsFound(Pattern pattern, String target) { + return pattern.matcher(target).find(); + } +} \ No newline at end of file diff --git a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java index 3a7baf1..800d5f2 100644 --- a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java +++ b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java @@ -19,11 +19,30 @@ import org.apache.commons.lang3.StringUtils; /** - * Static functions to work withh Wikimedia webrequest data. + * Functions to work with Wikimedia webrequest data. */ public class Webrequest { - /** + /* + * Meta-methods to enable eager instantiation in a singleton-based way. + * in non-Java terms: you get to only create one class instance, and only + * when you need it, instead of always having everything (static/eager instantiation) + * or always generating everything anew (!singletons). So we have: + * (1) an instance; + * (2) an empty constructor (to avoid people just calling the constructor); + * (3) an actual getInstance method to allow for instantiation. + */ + private static final Webrequest instance = new Webrequest(); + + private Webrequest() { + } + + public static Webrequest getInstance(){ + return instance; + } + + /* + * Now back to the good part. * Wikimedia-specific crawlers */ private static final Pattern crawlerPattern = Pattern.compile( @@ -53,7 +72,7 @@ * @param userAgent the user agent associated with the request. * @return boolean */ - public static boolean isCrawler(String userAgent) { + public boolean isCrawler(String userAgent) { return crawlerPattern.matcher(userAgent).find(); } @@ -66,7 +85,7 @@ * @param key the key to search for the value of. * @return String */ - public static String getXAnalyticsValue(String xAnalytics, String key) { + public String getXAnalyticsValue(String xAnalytics, String key) { String value = ""; @@ -97,7 +116,7 @@ * * @return String */ - public static String getAccessMethod(String uriHost, String userAgent) { + public String getAccessMethod(String uriHost, String userAgent) { String accessMethod = ""; if(appAgentPattern.matcher(userAgent).find()){ diff --git a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageview.java b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageviewDefinition.java similarity index 88% rename from refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageview.java rename to refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageviewDefinition.java index 4027cf1..8f957f3 100644 --- a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageview.java +++ b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestLegacyPageviewDefinition.java @@ -23,7 +23,7 @@ import junitparams.mappers.CsvWithHeaderMapper; @RunWith(JUnitParamsRunner.class) -public class TestLegacyPageview { +public class TestLegacyPageviewDefinition { @Test @@ -44,10 +44,11 @@ String content_type, String user_agent ) { + LegacyPageviewDefinition legacyPageviewDefinitionInstance = LegacyPageviewDefinition.getInstance(); assertEquals( test_description, is_legacy_pageview, - LegacyPageview.isLegacyPageview( + legacyPageviewDefinitionInstance.isLegacyPageview( ip_address, x_forwarded_for, uri_host, diff --git a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java index 8fef768..0380a88 100644 --- a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java +++ b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java @@ -25,7 +25,6 @@ @RunWith(JUnitParamsRunner.class) public class TestPageview { - @Test @FileParameters( value = "src/test/resources/pageview_test_data.csv", @@ -44,10 +43,11 @@ String content_type, String user_agent ) { + PageviewDefinition legacyPageviewDefinitionInstance = PageviewDefinition.getInstance(); assertEquals( test_description, is_pageview, - Pageview.isPageview( + legacyPageviewDefinitionInstance.isPageview( uri_host, uri_path, uri_query, diff --git a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java index 2ac9dcf..36b587a 100644 --- a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java +++ b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java @@ -21,10 +21,11 @@ boolean is_crawler, String user_agent ) { + Webrequest webrequest_inst = Webrequest.getInstance(); assertEquals( test_description, is_crawler, - Webrequest.isCrawler( + webrequest_inst.isCrawler( user_agent ) ); @@ -41,10 +42,11 @@ String x_analytics, String param ) { + Webrequest instance = Webrequest.getInstance(); assertEquals( test_description, expected_output, - Webrequest.getXAnalyticsValue( + instance.getXAnalyticsValue( x_analytics, param ) @@ -62,11 +64,11 @@ String uri_host, String user_agent ) { - + Webrequest webrequest_inst = Webrequest.getInstance(); assertEquals( test_description, expected_method, - Webrequest.getAccessMethod( + webrequest_inst.getAccessMethod( uri_host, user_agent ) diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetAccessMethodUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetAccessMethodUDF.java index 1a0f77b..7195af5 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetAccessMethodUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetAccessMethodUDF.java @@ -28,7 +28,8 @@ String uri_host, String user_agent ) { - return Webrequest.getAccessMethod( + Webrequest webrequest_inst = Webrequest.getInstance(); + return webrequest_inst.getAccessMethod( uri_host, user_agent ); diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetXAnalyticsValueUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetXAnalyticsValueUDF.java index ba2e254..6f66c9b 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetXAnalyticsValueUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetXAnalyticsValueUDF.java @@ -28,7 +28,8 @@ String x_analytics, String key ) { - return Webrequest.getXAnalyticsValue( + Webrequest webrequest_inst = Webrequest.getInstance(); + return webrequest_inst.getXAnalyticsValue( x_analytics, key ); diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsCrawlerUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsCrawlerUDF.java index 73fce7a..51df7a4 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsCrawlerUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsCrawlerUDF.java @@ -27,7 +27,8 @@ public boolean evaluate( String user_agent ) { - return Webrequest.isCrawler( + Webrequest webrequest_inst = Webrequest.getInstance(); + return webrequest_inst.isCrawler( user_agent ); } diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsLegacyPageviewUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsLegacyPageviewUDF.java index 9bd5905..81ce6e0 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsLegacyPageviewUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsLegacyPageviewUDF.java @@ -17,7 +17,7 @@ package org.wikimedia.analytics.refinery.hive; import org.apache.hadoop.hive.ql.exec.UDF; -import org.wikimedia.analytics.refinery.core.LegacyPageview; +import org.wikimedia.analytics.refinery.core.LegacyPageviewDefinition; /** @@ -59,7 +59,8 @@ String uriPath, String httpStatus ) { - return LegacyPageview.isLegacyPageview( + LegacyPageviewDefinition legacyPageviewDefinitionInstance = LegacyPageviewDefinition.getInstance(); + return legacyPageviewDefinitionInstance.isLegacyPageview( ip, xForwardedFor, uriHost, diff --git a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsPageviewUDF.java b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsPageviewUDF.java index 1cd8131..229d1ce 100644 --- a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsPageviewUDF.java +++ b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/IsPageviewUDF.java @@ -17,7 +17,7 @@ package org.wikimedia.analytics.refinery.hive; import org.apache.hadoop.hive.ql.exec.UDF; -import org.wikimedia.analytics.refinery.core.Pageview; +import org.wikimedia.analytics.refinery.core.PageviewDefinition; /** @@ -57,7 +57,8 @@ String contentType, String userAgent ) { - return Pageview.isPageview( + PageviewDefinition pageviewDefinitionInstance = PageviewDefinition.getInstance(); + return pageviewDefinitionInstance.isPageview( uriHost, uriPath, uriQuery, -- To view, visit https://gerrit.wikimedia.org/r/197296 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7c5928a7f8a9bd6fe7eae0ecc546ff2122b862d0 Gerrit-PatchSet: 5 Gerrit-Project: analytics/refinery/source Gerrit-Branch: master Gerrit-Owner: OliverKeyes <[email protected]> Gerrit-Reviewer: Nuria <[email protected]> Gerrit-Reviewer: OliverKeyes <[email protected]> Gerrit-Reviewer: Ottomata <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
