QChris has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/172201

Change subject: Add jobs for aggregating hourly projectcount files to daily per 
wiki csvs
......................................................................

Add jobs for aggregating hourly projectcount files to daily per wiki csvs

Bug: 72740
Change-Id: Id1a4a9cd2d6a401636ac844ce102d6ee61771e55
---
M manifests/misc/statistics.pp
M manifests/role/statistics.pp
M manifests/site.pp
3 files changed, 92 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/01/172201/1

diff --git a/manifests/misc/statistics.pp b/manifests/misc/statistics.pp
index 33c4db8..410d0ad 100644
--- a/manifests/misc/statistics.pp
+++ b/manifests/misc/statistics.pp
@@ -1115,3 +1115,81 @@
         mode  => '0440',
     }
 }
+
+# == Class misc::statistics::aggregator::projectcounts::common
+# Defines common settings for aggregator jobs, and clones the repos
+class misc::statistics::aggregator::projectcounts::common {
+    include misc::statistics::base
+    include misc::statistics::user
+
+    $working_path   = "${misc::statistics::base::working_path}/aggregator"
+
+    $script_path    = "${working_path}/scripts"
+    $data_repo_path = "${working_path}/data"
+    $data_path      = "${data_repo_path}/projectcounts/daily"
+    $user           = $misc::statistics::user::username
+    $group          = $misc::statistics::user::username
+
+    git::clone { 'aggregator_code':
+        ensure    => 'latest',
+        directory => $script_path,
+        origin    => 
'https://gerrit.wikimedia.org/r/p/analytics/aggregator.git',
+        owner     => $user,
+        group     => $group,
+        mode      => '0750',
+    }
+
+    git::clone { 'aggregator_data':
+        ensure    => 'latest',
+        directory => $data_repo_path,
+        origin    => 
'https://gerrit.wikimedia.org/r/p/analytics/aggregator/data.git',
+        owner     => $user,
+        group     => $group,
+        mode      => '0750',
+    }
+}
+
+# == Class misc::statistics::aggregator::projectcounts::jobs::aggregate
+# Aggregates hourly projectcounts files into daily per project csvs
+class misc::statistics::aggregator::projectcounts::jobs::aggregate {
+    include misc::statistics::aggregator::projectcounts::common
+
+    $working_path = 
$misc::statistics::aggregator::projectcounts::common::working_path
+    $script_path  = 
$misc::statistics::aggregator::projectcounts::common::script_path
+    $data_path    = 
$misc::statistics::aggregator::projectcounts::common::data_path
+    $user         = 
$misc::statistics::aggregator::projectcounts::common::working_path::user
+    $group        = 
$misc::statistics::aggregator::projectcounts::common::working_path::group
+    $log_path     = "${working_path}/log"
+
+    file { $log_path:
+        ensure => directory,
+        owner  => $user,
+        group  => $group,
+        mode   => '0750',
+    }
+
+    cron { 'aggregator projectcounts aggregate':
+        command => "${script_path}/bin/aggregate_projectcounts --source 
/mnt/hdfs/wmf/data/archive/webstats --target ${data_path} --first-date=`date 
--date='-8 day' +\\%Y-\\%m-\\%d` --last-date=`date --date='-1 day' 
+\\%Y-\\%m-\\%d` --push-target --log ${log_path}/`date 
+\\%Y-\\%m-\\%d--\\%H-\\%M-\\%S`.log",
+        require => File[$log_path],
+        user    => $user,
+        hour    => '13',
+        minute  => '0',
+    }
+}
+
+# == Class misc::statistics::aggregator::projectcounts::jobs::monitor
+# Basic monitoring of the aggregated daily per project csvs
+class misc::statistics::aggregator::projectcounts::jobs::monitor {
+    include misc::statistics::aggregator::projectcounts::common
+
+    $script_path  = 
$misc::statistics::aggregator::projectcounts::common::script_path
+    $data_path    = 
$misc::statistics::aggregator::projectcounts::common::data_path
+    $user         = 
$misc::statistics::aggregator::projectcounts::common::working_path::user
+
+    cron { 'aggregator projectcounts monitor':
+        monitor => "${script_path}/bin/check_validity_aggregated_projectcounts 
--data ${data_path}",
+        user    => $user,
+        hour    => '13',
+        minute  => '45',
+    }
+}
diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index d3ab51c..7d78e7c 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -81,3 +81,14 @@
     # backup eventlogging logs
     backup::set { 'a-eventlogging' : }
 }
+
+class role::statistics::aggregator inherits role::statistics {
+    system::role { 'role::statistics':
+        description => 'statistics aggregator',
+    }
+
+    # aggregator: aggregate hourly projectcount files
+    include misc::statistics::aggregator::projectcounts::jobs::aggregate
+    # aggregator: monitor aggregated hourly projectcount files
+    include misc::statistics::aggregator::projectcounts::jobs::monitor
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 2be86fc..71ace6e 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -2595,6 +2595,9 @@
     # and only readable by users in the
     # analytics-privatedata-users group.
     include role::analytics::password::research
+
+    # Run aggregation jobs
+    include role::statistics::aggregator
 }
 
 # stat1003 is a general purpose number cruncher for

-- 
To view, visit https://gerrit.wikimedia.org/r/172201
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id1a4a9cd2d6a401636ac844ce102d6ee61771e55
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: QChris <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to