Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/342250 )
Change subject: Create new refinery/job directory and move refinery cron job
classes there
......................................................................
Create new refinery/job directory and move refinery cron job classes there
This just organizes the class hierarchy a bit to make doing
T160083 a little cleaner.
Change-Id: Ib960a390a57894331b9a6e0f5f5f8f4c275c490c
---
M manifests/site.pp
D modules/role/manifests/analytics_cluster/refinery/data/drop.pp
R modules/role/manifests/analytics_cluster/refinery/job/camus.pp
R modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
A modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
R modules/role/manifests/analytics_cluster/refinery/job/guard.pp
6 files changed, 60 insertions(+), 57 deletions(-)
Approvals:
Ottomata: Verified; Looks good to me, approved
diff --git a/manifests/site.pp b/manifests/site.pp
index 0f52ccb..cb9184f 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -109,10 +109,10 @@
# Add cron jobs to run Camus to import data into
# HDFS from Kafka.
- analytics_cluster::refinery::camus,
+ analytics_cluster::refinery::job::camus,
# Add cron job to delete old data in HDFS
- analytics_cluster::refinery::data::drop,
+ analytics_cluster::refinery::job::data_drop,
# We need hive-site.xml in HDFS. This can be included
# on any node with a Hive client, but we really only
@@ -2633,9 +2633,9 @@
analytics_cluster::refinery,
# Include analytics/refinery checks that send email about
# webrequest partitions faultyness.
- analytics_cluster::refinery::data::check,
+ analytics_cluster::refinery::job::data_check,
# Include analytics/refinery/source guard checks
- analytics_cluster::refinery::guard,
+ analytics_cluster::refinery::job::guard,
# Set up a read only rsync module to allow access
# to public data generated by the Analytics Cluster.
diff --git a/modules/role/manifests/analytics_cluster/refinery/data/drop.pp
b/modules/role/manifests/analytics_cluster/refinery/data/drop.pp
deleted file mode 100644
index c784bbb..0000000
--- a/modules/role/manifests/analytics_cluster/refinery/data/drop.pp
+++ /dev/null
@@ -1,47 +0,0 @@
-# == Class role::analytics_cluster::refinery::data::drop
-# Installs cron job to drop old hive partitions
-# and delete old data from HDFS.
-#
-class role::analytics_cluster::refinery::data::drop {
- require ::role::analytics_cluster::refinery
-
- $webrequest_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-webrequest-partitions.log"
- $eventlogging_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-eventlogging-partitions.log"
- $wdqs_extract_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-wdqs-extract-partitions.log"
-
- # Keep this many days of raw webrequest data.
- $raw_retention_days = 31
- cron { 'refinery-drop-webrequest-raw-partitions':
- command => "export
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
-d ${raw_retention_days} -D wmf_raw -l /wmf/data/raw/webrequest -w raw >>
${webrequest_log_file} 2>&1",
- user => 'hdfs',
- minute => '15',
- hour => '*/4',
- }
-
- # Keep this many days of refined webrequest data.
- $refined_retention_days = 62
- cron { 'refinery-drop-webrequest-refined-partitions':
- command => "export
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
-d ${refined_retention_days} -D wmf -l /wmf/data/wmf/webrequest -w refined >>
${webrequest_log_file} 2>&1",
- user => 'hdfs',
- minute => '45',
- hour => '*/4',
- }
-
- # Keep this many days of eventlogging data.
- $eventlogging_retention_days = 90
- cron {'refinery-drop-eventlogging-partitions':
- command => "export
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-eventlogging-partitions
-d ${eventlogging_retention_days} -l /wmf/data/raw/eventlogging >>
${eventlogging_log_file} 2>&1",
- user => 'hdfs',
- minute => '15',
- hour => '*/4',
- }
-
- # keep this many days of wdqs_extract data
- $wdqs_extract_retention_days = 90
- cron {'refinery-drop-wdqs-extract-partitions':
- command => "export
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-hourly-partitions
-d ${wdqs_extract_retention_days} -p hive -D wmf -t wdqs_extract -l
/wmf/data/wmf/wdqs_extract >> ${wdqs_extract_log_file} 2>&1",
- user => 'hdfs',
- minute => '0',
- hour => '1',
- }
-}
diff --git a/modules/role/manifests/analytics_cluster/refinery/camus.pp
b/modules/role/manifests/analytics_cluster/refinery/job/camus.pp
similarity index 94%
rename from modules/role/manifests/analytics_cluster/refinery/camus.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/camus.pp
index 320b98a..a47527c 100644
--- a/modules/role/manifests/analytics_cluster/refinery/camus.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/camus.pp
@@ -1,8 +1,8 @@
-# == Class role::analytics_cluster::refinery::camus
+# == Class role::analytics_cluster::refinery::job::camus
# Uses camus::job to set up cron jobs to
# import data from Kafka into Hadoop.
#
-class role::analytics_cluster::refinery::camus {
+class role::analytics_cluster::refinery::job::camus {
require ::role::analytics_cluster::refinery
$kafka_config = kafka_config('analytics')
diff --git a/modules/role/manifests/analytics_cluster/refinery/data/check.pp
b/modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
similarity index 92%
rename from modules/role/manifests/analytics_cluster/refinery/data/check.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
index d5277c9..04b9f03 100644
--- a/modules/role/manifests/analytics_cluster/refinery/data/check.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
@@ -1,9 +1,9 @@
-# == Class role::analytics_cluster::refinery::data::check
+# == Class role::analytics_cluster::refinery::job::data_check
# Configures cron jobs that send email about the faultyness of webrequest data
#
# These checks walk HDFS through the plain file system.
#
-class role::analytics_cluster::refinery::data::check {
+class role::analytics_cluster::refinery::job::data_check {
require ::role::analytics_cluster::refinery
# This should not be hardcoded. Instead, one should be able to use
diff --git a/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
b/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
new file mode 100644
index 0000000..c414642
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
@@ -0,0 +1,50 @@
+# == Class role::analytics_cluster::refinery::job::data_drop
+# Installs cron job to drop old hive partitions
+# and delete old data from HDFS.
+#
+class role::analytics_cluster::refinery::job::data_drop {
+ require ::role::analytics_cluster::refinery
+
+ $webrequest_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-webrequest-partitions.log"
+ $eventlogging_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-eventlogging-partitions.log"
+ $wdqs_extract_log_file =
"${role::analytics_cluster::refinery::log_dir}/drop-wdqs-extract-partitions.log"
+
+ # Shortcut var to DRY up cron commands.
+ $env = "export
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python"
+
+ # Keep this many days of raw webrequest data.
+ $raw_retention_days = 31
+ cron { 'refinery-drop-webrequest-raw-partitions':
+ command => "${env} &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
-d ${raw_retention_days} -D wmf_raw -l /wmf/data/raw/webrequest -w raw >>
${webrequest_log_file} 2>&1",
+ user => 'hdfs',
+ minute => '15',
+ hour => '*/4',
+ }
+
+ # Keep this many days of refined webrequest data.
+ $refined_retention_days = 62
+ cron { 'refinery-drop-webrequest-refined-partitions':
+ command => "${env} &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
-d ${refined_retention_days} -D wmf -l /wmf/data/wmf/webrequest -w refined >>
${webrequest_log_file} 2>&1",
+ user => 'hdfs',
+ minute => '45',
+ hour => '*/4',
+ }
+
+ # Keep this many days of eventlogging data.
+ $eventlogging_retention_days = 90
+ cron {'refinery-drop-eventlogging-partitions':
+ command => "${env} &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-eventlogging-partitions
-d ${eventlogging_retention_days} -l /wmf/data/raw/eventlogging >>
${eventlogging_log_file} 2>&1",
+ user => 'hdfs',
+ minute => '15',
+ hour => '*/4',
+ }
+
+ # keep this many days of wdqs_extract data
+ $wdqs_extract_retention_days = 90
+ cron {'refinery-drop-wdqs-extract-partitions':
+ command => "${env} &&
${role::analytics_cluster::refinery::path}/bin/refinery-drop-hourly-partitions
-d ${wdqs_extract_retention_days} -p hive -D wmf -t wdqs_extract -l
/wmf/data/wmf/wdqs_extract >> ${wdqs_extract_log_file} 2>&1",
+ user => 'hdfs',
+ minute => '0',
+ hour => '1',
+ }
+}
diff --git a/modules/role/manifests/analytics_cluster/refinery/guard.pp
b/modules/role/manifests/analytics_cluster/refinery/job/guard.pp
similarity index 82%
rename from modules/role/manifests/analytics_cluster/refinery/guard.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/guard.pp
index 046936b..bf52787 100644
--- a/modules/role/manifests/analytics_cluster/refinery/guard.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/guard.pp
@@ -1,8 +1,8 @@
-# == Class role::analytics_cluster::refinery::guard
+# == Class role::analytics_cluster::refinery::job::guard
# Configures a cron job that runs analytics/refinery/source guards daily and
# sends out an email upon issues
#
-class role::analytics_cluster::refinery::guard {
+class role::analytics_cluster::refinery::job::guard {
require ::role::analytics_cluster::refinery::source
include ::maven
--
To view, visit https://gerrit.wikimedia.org/r/342250
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib960a390a57894331b9a6e0f5f5f8f4c275c490c
Gerrit-PatchSet: 3
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Elukey <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits