Ottomata has submitted this change and it was merged. Change subject: Rename webstats table to pagecounts_all_sites ......................................................................
Rename webstats table to pagecounts_all_sites We'd like to make the table name “pagecounts-all-sites” (dashes instead of underscores) to stay consistent with the dataset's public location at http://dumps.wikimedia.org/other/pagecounts-all-sites/ . However, Hive does not allow dashes in table names. Hence, we use underscores in the table name. Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531 --- M diagrams/oozie-overview.dia R hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql M oozie/pagecounts-all-sites/archive/archive_pagecounts.hql M oozie/pagecounts-all-sites/archive/archive_projectcounts.hql M oozie/pagecounts-all-sites/archive/bundle.properties M oozie/pagecounts-all-sites/datasets.xml M oozie/pagecounts-all-sites/load/coordinator.properties M oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql 8 files changed, 24 insertions(+), 24 deletions(-) Approvals: Ottomata: Verified; Looks good to me, approved diff --git a/diagrams/oozie-overview.dia b/diagrams/oozie-overview.dia index 3390fef..0825af9 100644 --- a/diagrams/oozie-overview.dia +++ b/diagrams/oozie-overview.dia Binary files differ diff --git a/hive/webstats/create_webstats_table.hql b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql similarity index 88% rename from hive/webstats/create_webstats_table.hql rename to hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql index 0cdf0f4..887b0be 100644 --- a/hive/webstats/create_webstats_table.hql +++ b/hive/pagecounts-all-sites/create_pagecounts_all_sites_table.hql @@ -1,4 +1,4 @@ --- Creates table for hourly webstats output +-- Creates table for hourly pagecounts-all-sites output -- -- NOTE: When choosing partition field types, -- one should take into consideration Hive's @@ -27,11 +27,11 @@ -- <none> -- -- Usage --- hive -f create_webstats_table.hql \ +-- hive -f create_pagecounts_all_sites_table.hql \ -- --database wmf -- -CREATE TABLE IF NOT EXISTS `webstats` ( +CREATE TABLE IF NOT EXISTS `pagecounts_all_sites` ( `qualifier` string COMMENT 'Language/site/project identifier', `page_title` string COMMENT 'Title of the article', `count_views` bigint COMMENT 'Summed up pageviews', @@ -42,5 +42,5 @@ `day` int COMMENT 'Unpadded day of request', `hour` int COMMENT 'Unpadded hour of request') STORED AS TEXTFILE -LOCATION '/wmf/data/wmf/webstats' +LOCATION '/wmf/data/wmf/pagecounts-all-sites' ; diff --git a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql index 4448fad..ae6bdf9 100644 --- a/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql +++ b/oozie/pagecounts-all-sites/archive/archive_pagecounts.hql @@ -16,12 +16,12 @@ -- -- -- Usage: --- hive -f archive_pagecounts.hql \ --- -d destination_directory=/tmp/foo \ --- -d source_table=wmf.webstats \ --- -d year=2014 \ --- -d month=4 \ --- -d day=1 \ +-- hive -f archive_pagecounts.hql \ +-- -d destination_directory=/tmp/foo \ +-- -d source_table=wmf.pagecounts_all_sites \ +-- -d year=2014 \ +-- -d month=4 \ +-- -d day=1 \ -- -d hour=0 -- diff --git a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql index 82cb00b..36f31c0 100644 --- a/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql +++ b/oozie/pagecounts-all-sites/archive/archive_projectcounts.hql @@ -15,12 +15,12 @@ -- -- -- Usage: --- hive -f archive_projectcounts.hql \ --- -d destination_directory=/tmp/foo \ --- -d source_table=wmf.webstats \ --- -d year=2014 \ --- -d month=4 \ --- -d day=1 \ +-- hive -f archive_projectcounts.hql \ +-- -d destination_directory=/tmp/foo \ +-- -d source_table=wmf.pagecounts_all_sites \ +-- -d year=2014 \ +-- -d month=4 \ +-- -d day=1 \ -- -d hour=0 -- diff --git a/oozie/pagecounts-all-sites/archive/bundle.properties b/oozie/pagecounts-all-sites/archive/bundle.properties index 16f58e2..7ef158f 100644 --- a/oozie/pagecounts-all-sites/archive/bundle.properties +++ b/oozie/pagecounts-all-sites/archive/bundle.properties @@ -1,5 +1,5 @@ # Configures a coordinator to generate an hourly pagecounts-all-sites files from -# the webstats table. +# the pagecounts_all_sites table. # # Usage: # oozie job -run \ @@ -41,10 +41,10 @@ hive_site_xml = ${oozie_directory}/util/hive/hive-site.xml # Table to write hourly pagecounts to (fully qualified) -pagecounts_all_sites_table = wmf.webstats +pagecounts_all_sites_table = wmf.pagecounts_all_sites # HDFS path to directory where pagecounts-all-sites data is time bucketed. -pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/webstats +pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/pagecounts-all-sites # Temporary directory temporary_directory = ${name_node}/tmp diff --git a/oozie/pagecounts-all-sites/datasets.xml b/oozie/pagecounts-all-sites/datasets.xml index 3dc9768..6c72b25 100644 --- a/oozie/pagecounts-all-sites/datasets.xml +++ b/oozie/pagecounts-all-sites/datasets.xml @@ -7,7 +7,7 @@ Example: 2014-04-01T00:00Z ${pagecounts_all_sites_data_directory} - Path to directory where data is time bucketed. - Example: /wmf/data/wmf/webstats + Example: /wmf/data/wmf/pagecounts-all-sites --> <datasets> diff --git a/oozie/pagecounts-all-sites/load/coordinator.properties b/oozie/pagecounts-all-sites/load/coordinator.properties index e89c81e..499c2d3 100644 --- a/oozie/pagecounts-all-sites/load/coordinator.properties +++ b/oozie/pagecounts-all-sites/load/coordinator.properties @@ -1,5 +1,5 @@ # Configures a coordinator to insert hourly pagecounts-all-sites data -# from webrequests table into the webstats table. +# from webrequests table into the pagecounts_all_sites table. # # Usage: # oozie job -run \ @@ -42,13 +42,13 @@ webrequest_table = wmf_raw.webrequest # Table to write hourly pagecounts to (fully qualified) -pagecounts_all_sites_table = wmf.webstats +pagecounts_all_sites_table = wmf.pagecounts_all_sites # HDFS paths to directories where webrequest data is time bucketed. webrequest_data_directory = ${name_node}/wmf/data/raw/webrequest # HDFS path to directory where pagecounts-all-sites data is time bucketed. -pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/webstats +pagecounts_all_sites_data_directory = ${name_node}/wmf/data/wmf/pagecounts-all-sites # Coordintator to start. oozie.coord.application.path = ${oozie_directory}/pagecounts-all-sites/load/coordinator.xml diff --git a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql index e26a31e..cf41277 100644 --- a/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql +++ b/oozie/pagecounts-all-sites/load/insert_hourly_pagecounts.hql @@ -7,7 +7,7 @@ -- Usage: -- hive -f insert_hourly_pagecounts.hql \ -- -d source_table=wmf_raw.webrequest \ --- -d destination_table=wmf.webstats \ +-- -d destination_table=wmf.pagecounts_all_sites \ -- -d year=2014 \ -- -d month=9 \ -- -d day=15 \ -- To view, visit https://gerrit.wikimedia.org/r/182809 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3444c4246689cf18e45da107cf0aab12d9177531 Gerrit-PatchSet: 2 Gerrit-Project: analytics/refinery Gerrit-Branch: master Gerrit-Owner: QChris <[email protected]> Gerrit-Reviewer: Ottomata <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
