Hello Ottomata,
I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/182809
to review the following change.
Change subject: Rename webstats table to pagecounts_all_sites
......................................................................
Rename webstats table to pagecounts_all_sites
We'd like to make the table name “pagecounts-all-sites” (dashes
instead of underscores) to stay consistent with the dataset's public
location at
http://dumps.wikimedia.org/other/pagecounts-all-sites/
. However, Hive does not allow dashes in table names. Hence, we use
underscores in the table name.
Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531
---
M diagrams/oozie-overview.dia
R hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
M oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
M oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
M oozie/pagecounts-all-sites/archive/bundle.properties
M oozie/pagecounts-all-sites/datasets.xml
M oozie/pagecounts-all-sites/load/coordinator.properties
M oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
8 files changed, 24 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/09/182809/1
diff --git a/diagrams/oozie-overview.dia b/diagrams/oozie-overview.dia
index 3390fef..0825af9 100644
--- a/diagrams/oozie-overview.dia
+++ b/diagrams/oozie-overview.dia
Binary files differ
diff --git a/hive/webstats/create_webstats_table.hql
b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
similarity index 88%
rename from hive/webstats/create_webstats_table.hql
rename to hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
index 0cdf0f4..887b0be 100644
--- a/hive/webstats/create_webstats_table.hql
+++ b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
@@ -1,4 +1,4 @@
--- Creates table for hourly webstats output
+-- Creates table for hourly pagecounts-all-sites output
--
-- NOTE: When choosing partition field types,
-- one should take into consideration Hive's
@@ -27,11 +27,11 @@
-- <none>
--
-- Usage
--- hive -f create_webstats_table.hql \
+-- hive -f create_pagecounts_all_sites_table.hql \
-- --database wmf
--
-CREATE TABLE IF NOT EXISTS `webstats` (
+CREATE TABLE IF NOT EXISTS `pagecounts_all_sites` (
`qualifier` string COMMENT 'Language/site/project identifier',
`page_title` string COMMENT 'Title of the article',
`count_views` bigint COMMENT 'Summed up pageviews',
@@ -42,5 +42,5 @@
`day` int COMMENT 'Unpadded day of request',
`hour` int COMMENT 'Unpadded hour of request')
STORED AS TEXTFILE
-LOCATION '/wmf/data/wmf/webstats'
+LOCATION '/wmf/data/wmf/pagecounts-all-sites'
;
diff --git a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
index 4448fad..ae6bdf9 100644
--- a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
+++ b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
@@ -16,12 +16,12 @@
--
--
-- Usage:
--- hive -f archive_pagecounts.hql \
--- -d destination_directory=/tmp/foo \
--- -d source_table=wmf.webstats \
--- -d year=2014 \
--- -d month=4 \
--- -d day=1 \
+-- hive -f archive_pagecounts.hql \
+-- -d destination_directory=/tmp/foo \
+-- -d source_table=wmf.pagecounts_all_sites \
+-- -d year=2014 \
+-- -d month=4 \
+-- -d day=1 \
-- -d hour=0
--
diff --git a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
index 82cb00b..36f31c0 100644
--- a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
+++ b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
@@ -15,12 +15,12 @@
--
--
-- Usage:
--- hive -f archive_projectcounts.hql \
--- -d destination_directory=/tmp/foo \
--- -d source_table=wmf.webstats \
--- -d year=2014 \
--- -d month=4 \
--- -d day=1 \
+-- hive -f archive_projectcounts.hql \
+-- -d destination_directory=/tmp/foo \
+-- -d source_table=wmf.pagecounts_all_sites \
+-- -d year=2014 \
+-- -d month=4 \
+-- -d day=1 \
-- -d hour=0
--
diff --git a/oozie/pagecounts-all-sites/archive/bundle.properties
b/oozie/pagecounts-all-sites/archive/bundle.properties
index 16f58e2..7ef158f 100644
--- a/oozie/pagecounts-all-sites/archive/bundle.properties
+++ b/oozie/pagecounts-all-sites/archive/bundle.properties
@@ -1,5 +1,5 @@
# Configures a coordinator to generate an hourly pagecounts-all-sites files
from
-# the webstats table.
+# the pagecounts_all_sites table.
#
# Usage:
# oozie job -run \
@@ -41,10 +41,10 @@
hive_site_xml =
${oozie_directory}/util/hive/hive-site.xml
# Table to write hourly pagecounts to (fully qualified)
-pagecounts_all_sites_table = wmf.webstats
+pagecounts_all_sites_table = wmf.pagecounts_all_sites
# HDFS path to directory where pagecounts-all-sites data is time bucketed.
-pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/webstats
+pagecounts_all_sites_data_directory =
${name_node}/wmf/data/wmf/pagecounts-all-sites
# Temporary directory
temporary_directory = ${name_node}/tmp
diff --git a/oozie/pagecounts-all-sites/datasets.xml
b/oozie/pagecounts-all-sites/datasets.xml
index 3dc9768..6c72b25 100644
--- a/oozie/pagecounts-all-sites/datasets.xml
+++ b/oozie/pagecounts-all-sites/datasets.xml
@@ -7,7 +7,7 @@
Example: 2014-04-01T00:00Z
${pagecounts_all_sites_data_directory}
- Path to directory where data is time bucketed.
- Example: /wmf/data/wmf/webstats
+ Example: /wmf/data/wmf/pagecounts-all-sites
-->
<datasets>
diff --git a/oozie/pagecounts-all-sites/load/coordinator.properties
b/oozie/pagecounts-all-sites/load/coordinator.properties
index e89c81e..499c2d3 100644
--- a/oozie/pagecounts-all-sites/load/coordinator.properties
+++ b/oozie/pagecounts-all-sites/load/coordinator.properties
@@ -1,5 +1,5 @@
# Configures a coordinator to insert hourly pagecounts-all-sites data
-# from webrequests table into the webstats table.
+# from webrequests table into the pagecounts_all_sites table.
#
# Usage:
# oozie job -run \
@@ -42,13 +42,13 @@
webrequest_table = wmf_raw.webrequest
# Table to write hourly pagecounts to (fully qualified)
-pagecounts_all_sites_table = wmf.webstats
+pagecounts_all_sites_table = wmf.pagecounts_all_sites
# HDFS paths to directories where webrequest data is time bucketed.
webrequest_data_directory = ${name_node}/wmf/data/raw/webrequest
# HDFS path to directory where pagecounts-all-sites data is time bucketed.
-pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/webstats
+pagecounts_all_sites_data_directory =
${name_node}/wmf/data/wmf/pagecounts-all-sites
# Coordintator to start.
oozie.coord.application.path =
${oozie_directory}/pagecounts-all-sites/load/coordinator.xml
diff --git a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
index e26a31e..cf41277 100644
--- a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
+++ b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
@@ -7,7 +7,7 @@
-- Usage:
-- hive -f insert_hourly_pagecounts.hql \
-- -d source_table=wmf_raw.webrequest \
--- -d destination_table=wmf.webstats \
+-- -d destination_table=wmf.pagecounts_all_sites \
-- -d year=2014 \
-- -d month=9 \
-- -d day=15 \
--
To view, visit https://gerrit.wikimedia.org/r/182809
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits