Hello Ottomata,

I'd like you to do a code review.  Please visit

    https://gerrit.wikimedia.org/r/182809

to review the following change.

Change subject: Rename webstats table to pagecounts_all_sites
......................................................................

Rename webstats table to pagecounts_all_sites

We'd like to make the table name “pagecounts-all-sites” (dashes
instead of underscores) to stay consistent with the dataset's public
location at

  http://dumps.wikimedia.org/other/pagecounts-all-sites/

. However, Hive does not allow dashes in table names. Hence, we use
underscores in the table name.

Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531
---
M diagrams/oozie-overview.dia
R hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
M oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
M oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
M oozie/pagecounts-all-sites/archive/bundle.properties
M oozie/pagecounts-all-sites/datasets.xml
M oozie/pagecounts-all-sites/load/coordinator.properties
M oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
8 files changed, 24 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/09/182809/1

diff --git a/diagrams/oozie-overview.dia b/diagrams/oozie-overview.dia
index 3390fef..0825af9 100644
--- a/diagrams/oozie-overview.dia
+++ b/diagrams/oozie-overview.dia
Binary files differ
diff --git a/hive/webstats/create_webstats_table.hql 
b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
similarity index 88%
rename from hive/webstats/create_webstats_table.hql
rename to hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
index 0cdf0f4..887b0be 100644
--- a/hive/webstats/create_webstats_table.hql
+++ b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql
@@ -1,4 +1,4 @@
--- Creates table for hourly webstats output
+-- Creates table for hourly pagecounts-all-sites output
 --
 -- NOTE:  When choosing partition field types,
 -- one should take into consideration Hive's
@@ -27,11 +27,11 @@
 --     <none>
 --
 -- Usage
---     hive -f create_webstats_table.hql \
+--     hive -f create_pagecounts_all_sites_table.hql \
 --         --database wmf
 --
 
-CREATE TABLE IF NOT EXISTS `webstats` (
+CREATE TABLE IF NOT EXISTS `pagecounts_all_sites` (
     `qualifier`           string COMMENT 'Language/site/project identifier',
     `page_title`          string COMMENT 'Title of the article',
     `count_views`         bigint COMMENT 'Summed up pageviews',
@@ -42,5 +42,5 @@
     `day`                 int    COMMENT 'Unpadded day of request',
     `hour`                int    COMMENT 'Unpadded hour of request')
 STORED AS TEXTFILE
-LOCATION '/wmf/data/wmf/webstats'
+LOCATION '/wmf/data/wmf/pagecounts-all-sites'
 ;
diff --git a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql 
b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
index 4448fad..ae6bdf9 100644
--- a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
+++ b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql
@@ -16,12 +16,12 @@
 --
 --
 -- Usage:
---     hive -f archive_pagecounts.hql  \
---         -d destination_directory=/tmp/foo      \
---         -d source_table=wmf.webstats           \
---         -d year=2014                           \
---         -d month=4                             \
---         -d day=1                               \
+--     hive -f archive_pagecounts.hql               \
+--         -d destination_directory=/tmp/foo        \
+--         -d source_table=wmf.pagecounts_all_sites \
+--         -d year=2014                             \
+--         -d month=4                               \
+--         -d day=1                                 \
 --         -d hour=0
 --
 
diff --git a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql 
b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
index 82cb00b..36f31c0 100644
--- a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
+++ b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql
@@ -15,12 +15,12 @@
 --
 --
 -- Usage:
---     hive -f archive_projectcounts.hql  \
---         -d destination_directory=/tmp/foo         \
---         -d source_table=wmf.webstats              \
---         -d year=2014                              \
---         -d month=4                                \
---         -d day=1                                  \
+--     hive -f archive_projectcounts.hql            \
+--         -d destination_directory=/tmp/foo        \
+--         -d source_table=wmf.pagecounts_all_sites \
+--         -d year=2014                             \
+--         -d month=4                               \
+--         -d day=1                                 \
 --         -d hour=0
 --
 
diff --git a/oozie/pagecounts-all-sites/archive/bundle.properties 
b/oozie/pagecounts-all-sites/archive/bundle.properties
index 16f58e2..7ef158f 100644
--- a/oozie/pagecounts-all-sites/archive/bundle.properties
+++ b/oozie/pagecounts-all-sites/archive/bundle.properties
@@ -1,5 +1,5 @@
 # Configures a coordinator to generate an hourly pagecounts-all-sites files 
from
-# the webstats table.
+# the pagecounts_all_sites table.
 #
 # Usage:
 #     oozie job -run \
@@ -41,10 +41,10 @@
 hive_site_xml                          = 
${oozie_directory}/util/hive/hive-site.xml
 
 # Table to write hourly pagecounts to (fully qualified)
-pagecounts_all_sites_table             = wmf.webstats
+pagecounts_all_sites_table             = wmf.pagecounts_all_sites
 
 # HDFS path to directory where pagecounts-all-sites data is time bucketed.
-pagecounts_all_sites_data_directory    = ${name_node}/wmf/data/wmf/webstats
+pagecounts_all_sites_data_directory    = 
${name_node}/wmf/data/wmf/pagecounts-all-sites
 
 # Temporary directory
 temporary_directory                    = ${name_node}/tmp
diff --git a/oozie/pagecounts-all-sites/datasets.xml 
b/oozie/pagecounts-all-sites/datasets.xml
index 3dc9768..6c72b25 100644
--- a/oozie/pagecounts-all-sites/datasets.xml
+++ b/oozie/pagecounts-all-sites/datasets.xml
@@ -7,7 +7,7 @@
                         Example: 2014-04-01T00:00Z
     ${pagecounts_all_sites_data_directory}
                       - Path to directory where data is time bucketed.
-                        Example: /wmf/data/wmf/webstats
+                        Example: /wmf/data/wmf/pagecounts-all-sites
 -->
 
 <datasets>
diff --git a/oozie/pagecounts-all-sites/load/coordinator.properties 
b/oozie/pagecounts-all-sites/load/coordinator.properties
index e89c81e..499c2d3 100644
--- a/oozie/pagecounts-all-sites/load/coordinator.properties
+++ b/oozie/pagecounts-all-sites/load/coordinator.properties
@@ -1,5 +1,5 @@
 # Configures a coordinator to insert hourly pagecounts-all-sites data
-# from webrequests table into the webstats table.
+# from webrequests table into the pagecounts_all_sites table.
 #
 # Usage:
 #     oozie job -run \
@@ -42,13 +42,13 @@
 webrequest_table                    = wmf_raw.webrequest
 
 # Table to write hourly pagecounts to (fully qualified)
-pagecounts_all_sites_table          = wmf.webstats
+pagecounts_all_sites_table          = wmf.pagecounts_all_sites
 
 # HDFS paths to directories where webrequest data is time bucketed.
 webrequest_data_directory           = ${name_node}/wmf/data/raw/webrequest
 
 # HDFS path to directory where pagecounts-all-sites data is time bucketed.
-pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/webstats
+pagecounts_all_sites_data_directory = 
${name_node}/wmf/data/wmf/pagecounts-all-sites
 
 # Coordintator to start.
 oozie.coord.application.path        = 
${oozie_directory}/pagecounts-all-sites/load/coordinator.xml
diff --git a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql 
b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
index e26a31e..cf41277 100644
--- a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
+++ b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql
@@ -7,7 +7,7 @@
 -- Usage:
 --     hive -f insert_hourly_pagecounts.hql \
 --         -d source_table=wmf_raw.webrequest \
---         -d destination_table=wmf.webstats \
+--         -d destination_table=wmf.pagecounts_all_sites \
 --         -d year=2014 \
 --         -d month=9 \
 --         -d day=15 \

-- 
To view, visit https://gerrit.wikimedia.org/r/182809
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to