Joal has uploaded a new change for review.
https://gerrit.wikimedia.org/r/192891
Change subject: Add refinery_hive_jar_version as a parameter in oozie bundle
properties.
......................................................................
Add refinery_hive_jar_version as a parameter in oozie bundle properties.
Update oozie webrequest/refine config files to define and use
refinery_hive_jar_version.
Update refine HQL to use the field from oozie parameter.
Change-Id: I42185eae73af861b0b848add8dfc27b5b0ba0287
---
M oozie/webrequest/refine/bundle.properties
M oozie/webrequest/refine/bundle.xml
M oozie/webrequest/refine/coordinator.xml
M oozie/webrequest/refine/refine_webrequest.hql
M oozie/webrequest/refine/workflow.xml
5 files changed, 22 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/91/192891/1
diff --git a/oozie/webrequest/refine/bundle.properties
b/oozie/webrequest/refine/bundle.properties
index 439989c..128bd89 100644
--- a/oozie/webrequest/refine/bundle.properties
+++ b/oozie/webrequest/refine/bundle.properties
@@ -50,6 +50,9 @@
# HDFS path to hive-site.xml file. This is needed to run hive actions.
hive_site_xml = ${oozie_directory}/util/hive/hive-site.xml
+# Version of Hive UDF jar to import
+refinery_hive_jar_version = 0.0.7
+
# Fully qualified Hive table name.
source_table = wmf_raw.webrequest
destination_table = wmf.webrequest
diff --git a/oozie/webrequest/refine/bundle.xml
b/oozie/webrequest/refine/bundle.xml
index e78480e..7527b89 100644
--- a/oozie/webrequest/refine/bundle.xml
+++ b/oozie/webrequest/refine/bundle.xml
@@ -24,6 +24,7 @@
<property><name>datasets_file</name></property>
<property><name>hive_site_xml</name></property>
+ <property><name>refinery_hive_jar_version</name></property>
<property><name>artifacts_directory</name></property>
<property><name>source_table</name></property>
<property><name>destination_table</name></property>
diff --git a/oozie/webrequest/refine/coordinator.xml
b/oozie/webrequest/refine/coordinator.xml
index 1b50fd6..e042ee3 100644
--- a/oozie/webrequest/refine/coordinator.xml
+++ b/oozie/webrequest/refine/coordinator.xml
@@ -27,6 +27,7 @@
<property><name>datasets_file</name></property>
<property><name>hive_site_xml</name></property>
+ <property><name>refinery_hive_jar_version</name></property>
<property><name>artifacts_directory</name></property>
<property><name>source_table</name></property>
<property><name>destination_table</name></property>
@@ -114,6 +115,10 @@
<value>${hive_site_xml}</value>
</property>
<property>
+ <name>refinery_hive_jar_version</name>
+ <value>${refinery_hive_jar_version}</value>
+ </property>
+ <property>
<name>artifacts_directory</name>
<value>${artifacts_directory}</value>
</property>
diff --git a/oozie/webrequest/refine/refine_webrequest.hql
b/oozie/webrequest/refine/refine_webrequest.hql
index f97ce37..d5064b4 100644
--- a/oozie/webrequest/refine/refine_webrequest.hql
+++ b/oozie/webrequest/refine/refine_webrequest.hql
@@ -1,4 +1,9 @@
-- Parameters:
+-- refinery_hive_jar_version
+-- -- Version of the jar to import for UDFs
+-- artifacts_directory
+-- -- The artifact directory where to find
+-- jar files to import for UDFs
-- source_table -- Fully qualified table name to compute the
-- statistics for.
-- destination_table -- Fully qualified table name to stopre the
@@ -19,6 +24,8 @@
--
-- Usage:
-- hive -f refine_webrequest.hql \
+-- -d refinery_hive_jar_version=0.0.7 \
+-- -d artifacts_directory=/wmf/refinery/current/artifacts \
-- -d source_table=wmf_raw.webrequest \
-- -d destination_table=wmf.webrequest \
-- -d webrequest_source=text \
@@ -39,7 +46,7 @@
-- table is clustered by.
SET mapreduce.job.reduces = 64;
-ADD JAR
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-hive-0.0.6.jar;
+ADD JAR
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-hive-${refinery_hive_jar_version}.jar;
CREATE TEMPORARY FUNCTION is_pageview as
'org.wikimedia.analytics.refinery.hive.IsPageviewUDF';
INSERT OVERWRITE TABLE ${destination_table}
diff --git a/oozie/webrequest/refine/workflow.xml
b/oozie/webrequest/refine/workflow.xml
index e077658..0ce5653 100644
--- a/oozie/webrequest/refine/workflow.xml
+++ b/oozie/webrequest/refine/workflow.xml
@@ -24,6 +24,10 @@
<description>hive-site.xml file path in HDFS</description>
</property>
<property>
+ <name>refinery_hive_jar_version</name>
+ <value>Version of the refinery-hive jar file to import for
UDFs</value>
+ </property>
+ <property>
<name>artifacts_directory</name>
<description>Path in HDFS to artifacts. refinery-hive.jar should
be here.</description>
</property>
@@ -88,6 +92,7 @@
</configuration>
<script>${hive_script}</script>
+
<param>refinery_hive_jar_version=${refinery_hive_jar_version}</param>
<param>artifacts_directory=${artifacts_directory}</param>
<param>source_table=${source_table}</param>
<param>destination_table=${destination_table}</param>
--
To view, visit https://gerrit.wikimedia.org/r/192891
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I42185eae73af861b0b848add8dfc27b5b0ba0287
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits