Hello Ottomata,
I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/148648
to review the following change.
Change subject: Switch to fully qualified table names
......................................................................
Switch to fully qualified table names
Change-Id: If4d99d62f9ba89a560ad39af29a1b8ec19180b72
---
M hive/webrequest/create_webrequest_sequence_stats_table.hql
M oozie/util/hive/partition/add/add_partition.hql
M oozie/util/hive/partition/add/workflow.properties
M oozie/util/hive/partition/add/workflow.xml
M oozie/webrequest/partition/add/coordinator.properties
M oozie/webrequest/partition/add/coordinator.xml
6 files changed, 11 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/48/148648/1
diff --git a/hive/webrequest/create_webrequest_sequence_stats_table.hql
b/hive/webrequest/create_webrequest_sequence_stats_table.hql
index 480e21e..a4fb2c2 100644
--- a/hive/webrequest/create_webrequest_sequence_stats_table.hql
+++ b/hive/webrequest/create_webrequest_sequence_stats_table.hql
@@ -1,4 +1,4 @@
-CREATE TABLE webrequest_sequence_stats(
+CREATE TABLE wmf_raw.webrequest_sequence_stats(
hostname string COMMENT 'Source node hostname',
webrequest_source string COMMENT 'Source cluster',
year int,
diff --git a/oozie/util/hive/partition/add/add_partition.hql
b/oozie/util/hive/partition/add/add_partition.hql
index 618319e..4c1bdb6 100644
--- a/oozie/util/hive/partition/add/add_partition.hql
+++ b/oozie/util/hive/partition/add/add_partition.hql
@@ -1,3 +1,5 @@
+-- Since ALTER TABLE does not handle fully qualified table names, we
+-- have to require database an table as separate parameters.
USE ${database};
ALTER TABLE ${table}
ADD IF NOT EXISTS
diff --git a/oozie/util/hive/partition/add/workflow.properties
b/oozie/util/hive/partition/add/workflow.properties
index cc47aca..b93d416 100644
--- a/oozie/util/hive/partition/add/workflow.properties
+++ b/oozie/util/hive/partition/add/workflow.properties
@@ -19,11 +19,8 @@
# HDFS path to hive-site.xml file. This is needed to run hive actions.
hive_site_xml = ${oozie_directory}/util/hive/hive-site.xml
-# Hive database name.
-database = wmf_raw
-
-# Hive table name.
-table = webrequest
+# Fully qualified Hive table name.
+table = wmf_raw.webrequest
# Workflow app to run.
oozie.wf.application.path =
${oozie_directory}/util/hive/partition/add/workflow.xml
diff --git a/oozie/util/hive/partition/add/workflow.xml
b/oozie/util/hive/partition/add/workflow.xml
index 1f16b99..63bf68a 100644
--- a/oozie/util/hive/partition/add/workflow.xml
+++ b/oozie/util/hive/partition/add/workflow.xml
@@ -23,12 +23,8 @@
<description>hive-site.xml file path in HDFS</description>
</property>
<property>
- <name>database</name>
- <description>Hive database to use.</description>
- </property>
- <property>
<name>table</name>
- <description>Hive table to partition.</description>
+ <description>Fully qualified name of Hive table to
partition.</description>
</property>
<property>
<name>partition_spec</name>
@@ -55,8 +51,8 @@
</configuration>
<script>${hive_script}</script>
- <param>database=${database}</param>
- <param>table=${table}</param>
+ <param>database=${replaceAll(table, "\\..*", "")}</param>
+ <param>table=${replaceAll(table, "^.*\\.", "")}</param>
<param>location=${location}</param>
<param>partition_spec=${partition_spec}</param>
</hive>
diff --git a/oozie/webrequest/partition/add/coordinator.properties
b/oozie/webrequest/partition/add/coordinator.properties
index 924510f..6c10ab1 100644
--- a/oozie/webrequest/partition/add/coordinator.properties
+++ b/oozie/webrequest/partition/add/coordinator.properties
@@ -35,14 +35,11 @@
# HDFS path to hive-site.xml file. This is needed to run hive actions.
hive_site_xml = ${oozie_directory}/util/hive/hive-site.xml
-# Hive database name.
-database = wmf_raw
-
-# Hive table name.
-table = webrequest
+# Fully qualified Hive table name.
+table = wmf_raw.webrequest
# HDFS path to directory where webrequest data is time bucketed.
-data_directory =
${name_node}/wmf/data/raw/${table}/webrequest_${webrequest_source}/hourly
+data_directory =
${name_node}/wmf/data/raw/webrequest/webrequest_${webrequest_source}/hourly
# Coordintator to start.
oozie.coord.application.path =
${oozie_directory}/webrequest/partition/add/coordinator.xml
diff --git a/oozie/webrequest/partition/add/coordinator.xml
b/oozie/webrequest/partition/add/coordinator.xml
index 179f929..2ae0ae9 100644
--- a/oozie/webrequest/partition/add/coordinator.xml
+++ b/oozie/webrequest/partition/add/coordinator.xml
@@ -21,7 +21,6 @@
<property><name>data_directory</name></property>
<property><name>hive_site_xml</name></property>
- <property><name>database</name></property>
<property><name>table</name></property>
<property><name>webrequest_source</name></property>
</parameters>
@@ -70,10 +69,6 @@
<property>
<name>hive_site_xml</name>
<value>${hive_site_xml}</value>
- </property>
- <property>
- <name>database</name>
- <value>${database}</value>
</property>
<property>
<name>table</name>
--
To view, visit https://gerrit.wikimedia.org/r/148648
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If4d99d62f9ba89a560ad39af29a1b8ec19180b72
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits