Ottomata has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/342250 )

Change subject: Create new refinery/job directory and move refinery cron job 
classes there
......................................................................


Create new refinery/job directory and move refinery cron job classes there

This just organizes the class hierarchy a bit to make doing
T160083 a little cleaner.

Change-Id: Ib960a390a57894331b9a6e0f5f5f8f4c275c490c
---
M manifests/site.pp
D modules/role/manifests/analytics_cluster/refinery/data/drop.pp
R modules/role/manifests/analytics_cluster/refinery/job/camus.pp
R modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
A modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
R modules/role/manifests/analytics_cluster/refinery/job/guard.pp
6 files changed, 60 insertions(+), 57 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/manifests/site.pp b/manifests/site.pp
index 0f52ccb..cb9184f 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -109,10 +109,10 @@
 
         # Add cron jobs to run Camus to import data into
         # HDFS from Kafka.
-        analytics_cluster::refinery::camus,
+        analytics_cluster::refinery::job::camus,
 
         # Add cron job to delete old data in HDFS
-        analytics_cluster::refinery::data::drop,
+        analytics_cluster::refinery::job::data_drop,
 
         # We need hive-site.xml in HDFS.  This can be included
         # on any node with a Hive client, but we really only
@@ -2633,9 +2633,9 @@
         analytics_cluster::refinery,
         # Include analytics/refinery checks that send email about
         # webrequest partitions faultyness.
-        analytics_cluster::refinery::data::check,
+        analytics_cluster::refinery::job::data_check,
         # Include analytics/refinery/source guard checks
-        analytics_cluster::refinery::guard,
+        analytics_cluster::refinery::job::guard,
 
         # Set up a read only rsync module to allow access
         # to public data generated by the Analytics Cluster.
diff --git a/modules/role/manifests/analytics_cluster/refinery/data/drop.pp 
b/modules/role/manifests/analytics_cluster/refinery/data/drop.pp
deleted file mode 100644
index c784bbb..0000000
--- a/modules/role/manifests/analytics_cluster/refinery/data/drop.pp
+++ /dev/null
@@ -1,47 +0,0 @@
-# == Class role::analytics_cluster::refinery::data::drop
-# Installs cron job to drop old hive partitions
-# and delete old data from HDFS.
-#
-class role::analytics_cluster::refinery::data::drop {
-    require ::role::analytics_cluster::refinery
-
-    $webrequest_log_file     = 
"${role::analytics_cluster::refinery::log_dir}/drop-webrequest-partitions.log"
-    $eventlogging_log_file   = 
"${role::analytics_cluster::refinery::log_dir}/drop-eventlogging-partitions.log"
-    $wdqs_extract_log_file   = 
"${role::analytics_cluster::refinery::log_dir}/drop-wdqs-extract-partitions.log"
-
-    # Keep this many days of raw webrequest data.
-    $raw_retention_days = 31
-    cron { 'refinery-drop-webrequest-raw-partitions':
-        command => "export 
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
 -d ${raw_retention_days} -D wmf_raw -l /wmf/data/raw/webrequest -w raw >> 
${webrequest_log_file} 2>&1",
-        user    => 'hdfs',
-        minute  => '15',
-        hour    => '*/4',
-    }
-
-    # Keep this many days of refined webrequest data.
-    $refined_retention_days = 62
-    cron { 'refinery-drop-webrequest-refined-partitions':
-        command => "export 
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
 -d ${refined_retention_days} -D wmf -l /wmf/data/wmf/webrequest -w refined >> 
${webrequest_log_file} 2>&1",
-        user    => 'hdfs',
-        minute  => '45',
-        hour    => '*/4',
-    }
-
-    # Keep this many days of eventlogging data.
-    $eventlogging_retention_days = 90
-    cron {'refinery-drop-eventlogging-partitions':
-        command => "export 
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-eventlogging-partitions
 -d ${eventlogging_retention_days} -l /wmf/data/raw/eventlogging >> 
${eventlogging_log_file} 2>&1",
-        user    => 'hdfs',
-        minute  => '15',
-        hour    => '*/4',
-    }
-
-    # keep this many days of wdqs_extract data
-    $wdqs_extract_retention_days = 90
-    cron {'refinery-drop-wdqs-extract-partitions':
-        command => "export 
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-hourly-partitions 
-d ${wdqs_extract_retention_days} -p hive -D wmf -t wdqs_extract -l 
/wmf/data/wmf/wdqs_extract >> ${wdqs_extract_log_file} 2>&1",
-        user    => 'hdfs',
-        minute  => '0',
-        hour    => '1',
-    }
-}
diff --git a/modules/role/manifests/analytics_cluster/refinery/camus.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/camus.pp
similarity index 94%
rename from modules/role/manifests/analytics_cluster/refinery/camus.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/camus.pp
index 320b98a..a47527c 100644
--- a/modules/role/manifests/analytics_cluster/refinery/camus.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/camus.pp
@@ -1,8 +1,8 @@
-# == Class role::analytics_cluster::refinery::camus
+# == Class role::analytics_cluster::refinery::job::camus
 # Uses camus::job to set up cron jobs to
 # import data from Kafka into Hadoop.
 #
-class role::analytics_cluster::refinery::camus {
+class role::analytics_cluster::refinery::job::camus {
     require ::role::analytics_cluster::refinery
 
     $kafka_config = kafka_config('analytics')
diff --git a/modules/role/manifests/analytics_cluster/refinery/data/check.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
similarity index 92%
rename from modules/role/manifests/analytics_cluster/refinery/data/check.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
index d5277c9..04b9f03 100644
--- a/modules/role/manifests/analytics_cluster/refinery/data/check.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/data_check.pp
@@ -1,9 +1,9 @@
-# == Class role::analytics_cluster::refinery::data::check
+# == Class role::analytics_cluster::refinery::job::data_check
 # Configures cron jobs that send email about the faultyness of webrequest data
 #
 # These checks walk HDFS through the plain file system.
 #
-class role::analytics_cluster::refinery::data::check {
+class role::analytics_cluster::refinery::job::data_check {
     require ::role::analytics_cluster::refinery
 
     # This should not be hardcoded.  Instead, one should be able to use
diff --git a/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
new file mode 100644
index 0000000..c414642
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/refinery/job/data_drop.pp
@@ -0,0 +1,50 @@
+# == Class role::analytics_cluster::refinery::job::data_drop
+# Installs cron job to drop old hive partitions
+# and delete old data from HDFS.
+#
+class role::analytics_cluster::refinery::job::data_drop {
+    require ::role::analytics_cluster::refinery
+
+    $webrequest_log_file     = 
"${role::analytics_cluster::refinery::log_dir}/drop-webrequest-partitions.log"
+    $eventlogging_log_file   = 
"${role::analytics_cluster::refinery::log_dir}/drop-eventlogging-partitions.log"
+    $wdqs_extract_log_file   = 
"${role::analytics_cluster::refinery::log_dir}/drop-wdqs-extract-partitions.log"
+
+    # Shortcut var to DRY up cron commands.
+    $env = "export 
PYTHONPATH=\${PYTHONPATH}:${role::analytics_cluster::refinery::path}/python"
+
+    # Keep this many days of raw webrequest data.
+    $raw_retention_days = 31
+    cron { 'refinery-drop-webrequest-raw-partitions':
+        command => "${env} && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
 -d ${raw_retention_days} -D wmf_raw -l /wmf/data/raw/webrequest -w raw >> 
${webrequest_log_file} 2>&1",
+        user    => 'hdfs',
+        minute  => '15',
+        hour    => '*/4',
+    }
+
+    # Keep this many days of refined webrequest data.
+    $refined_retention_days = 62
+    cron { 'refinery-drop-webrequest-refined-partitions':
+        command => "${env} && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-webrequest-partitions
 -d ${refined_retention_days} -D wmf -l /wmf/data/wmf/webrequest -w refined >> 
${webrequest_log_file} 2>&1",
+        user    => 'hdfs',
+        minute  => '45',
+        hour    => '*/4',
+    }
+
+    # Keep this many days of eventlogging data.
+    $eventlogging_retention_days = 90
+    cron {'refinery-drop-eventlogging-partitions':
+        command => "${env} && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-eventlogging-partitions
 -d ${eventlogging_retention_days} -l /wmf/data/raw/eventlogging >> 
${eventlogging_log_file} 2>&1",
+        user    => 'hdfs',
+        minute  => '15',
+        hour    => '*/4',
+    }
+
+    # keep this many days of wdqs_extract data
+    $wdqs_extract_retention_days = 90
+    cron {'refinery-drop-wdqs-extract-partitions':
+        command => "${env} && 
${role::analytics_cluster::refinery::path}/bin/refinery-drop-hourly-partitions 
-d ${wdqs_extract_retention_days} -p hive -D wmf -t wdqs_extract -l 
/wmf/data/wmf/wdqs_extract >> ${wdqs_extract_log_file} 2>&1",
+        user    => 'hdfs',
+        minute  => '0',
+        hour    => '1',
+    }
+}
diff --git a/modules/role/manifests/analytics_cluster/refinery/guard.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/guard.pp
similarity index 82%
rename from modules/role/manifests/analytics_cluster/refinery/guard.pp
rename to modules/role/manifests/analytics_cluster/refinery/job/guard.pp
index 046936b..bf52787 100644
--- a/modules/role/manifests/analytics_cluster/refinery/guard.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/guard.pp
@@ -1,8 +1,8 @@
-# == Class role::analytics_cluster::refinery::guard
+# == Class role::analytics_cluster::refinery::job::guard
 # Configures a cron job that runs analytics/refinery/source guards daily and
 # sends out an email upon issues
 #
-class role::analytics_cluster::refinery::guard {
+class role::analytics_cluster::refinery::job::guard {
     require ::role::analytics_cluster::refinery::source
 
     include ::maven

-- 
To view, visit https://gerrit.wikimedia.org/r/342250
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib960a390a57894331b9a6e0f5f5f8f4c275c490c
Gerrit-PatchSet: 3
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Elukey <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to