Joal has uploaded a new change for review.
https://gerrit.wikimedia.org/r/264949
Change subject: Modify oozie job to use jars v0.0.25
......................................................................
Modify oozie job to use jars v0.0.25
Update mobile_apps session metrics job to use new jar.
Update webrequest refine job to use new jar
and to use new referrer classification function introduced in new jar.
Bug: T122651
Change-Id: I1b8c4b8a4a69800a9013349ec2b62836adb15a84
---
M oozie/mobile_apps/session_metrics/coordinator.properties
M oozie/webrequest/refine/bundle.properties
M oozie/webrequest/refine/refine_webrequest.hql
3 files changed, 5 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/49/264949/1
diff --git a/oozie/mobile_apps/session_metrics/coordinator.properties
b/oozie/mobile_apps/session_metrics/coordinator.properties
index 4a43857..5bce0df 100644
--- a/oozie/mobile_apps/session_metrics/coordinator.properties
+++ b/oozie/mobile_apps/session_metrics/coordinator.properties
@@ -46,7 +46,7 @@
spark_master = yarn
spark_deploy = cluster
spark_assembly_jar =
${name_node}/user/spark/share/lib/spark-assembly.jar
-spark_job_jar =
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-job-0.0.14.jar
+spark_job_jar =
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-job-0.0.25.jar
spark_job_class =
org.wikimedia.analytics.refinery.job.AppSessionMetrics
spark_job_name = app_session_metrics
spark_number_executors = 32
diff --git a/oozie/webrequest/refine/bundle.properties
b/oozie/webrequest/refine/bundle.properties
index c61b6c2..4c10853 100644
--- a/oozie/webrequest/refine/bundle.properties
+++ b/oozie/webrequest/refine/bundle.properties
@@ -51,14 +51,14 @@
hive_site_xml = ${oozie_directory}/util/hive/hive-site.xml
# Version of Hive UDF jar to import
-refinery_jar_version = 0.0.23
+refinery_jar_version = 0.0.25
# Fully qualified Hive table name.
source_table = wmf_raw.webrequest
destination_table = wmf.webrequest
# Record version to keep track of changes
-record_version = 0.0.11
+record_version = 0.0.12
# HDFS path to directory where webrequest data is time bucketed.
webrequest_raw_data_directory = ${name_node}/wmf/data/raw/webrequest
diff --git a/oozie/webrequest/refine/refine_webrequest.hql
b/oozie/webrequest/refine/refine_webrequest.hql
index aa31056..9b51ac8 100644
--- a/oozie/webrequest/refine/refine_webrequest.hql
+++ b/oozie/webrequest/refine/refine_webrequest.hql
@@ -54,7 +54,7 @@
CREATE TEMPORARY FUNCTION get_access_method as
'org.wikimedia.analytics.refinery.hive.GetAccessMethodUDF';
CREATE TEMPORARY FUNCTION is_spider as
'org.wikimedia.analytics.refinery.hive.IsSpiderUDF';
CREATE TEMPORARY FUNCTION is_wikimedia_bot as
'org.wikimedia.analytics.refinery.hive.IsWikimediaBotUDF';
-CREATE TEMPORARY FUNCTION classify_referer AS
'org.wikimedia.analytics.refinery.hive.RefererClassifierUDF';
+CREATE TEMPORARY FUNCTION referer_classify AS
'org.wikimedia.analytics.refinery.hive.SmartReferrerClassifierUDF';
CREATE TEMPORARY FUNCTION get_pageview_info AS
'org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF';
CREATE TEMPORARY FUNCTION normalize_host AS
'org.wikimedia.analytics.refinery.hive.HostNormalizerUDF';
@@ -100,7 +100,7 @@
ELSE 'user'
END as agent_type,
(str_to_map(x_analytics, '\;', '=')['zero'] IS NOT NULL) as is_zero,
- classify_referer(referer) as referer_class,
+ referer_classify(referer) as referer_class,
normalize_host(uri_host) as normalized_host,
CASE
WHEN is_pageview(uri_host, uri_path, uri_query, http_status,
content_type, user_agent) THEN get_pageview_info(uri_host, uri_path, uri_query)
--
To view, visit https://gerrit.wikimedia.org/r/264949
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1b8c4b8a4a69800a9013349ec2b62836adb15a84
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits