QChris has uploaded a new change for review.
https://gerrit.wikimedia.org/r/172201
Change subject: Add jobs for aggregating hourly projectcount files to daily per
wiki csvs
......................................................................
Add jobs for aggregating hourly projectcount files to daily per wiki csvs
Bug: 72740
Change-Id: Id1a4a9cd2d6a401636ac844ce102d6ee61771e55
---
M manifests/misc/statistics.pp
M manifests/role/statistics.pp
M manifests/site.pp
3 files changed, 92 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/01/172201/1
diff --git a/manifests/misc/statistics.pp b/manifests/misc/statistics.pp
index 33c4db8..410d0ad 100644
--- a/manifests/misc/statistics.pp
+++ b/manifests/misc/statistics.pp
@@ -1115,3 +1115,81 @@
mode => '0440',
}
}
+
+# == Class misc::statistics::aggregator::projectcounts::common
+# Defines common settings for aggregator jobs, and clones the repos
+class misc::statistics::aggregator::projectcounts::common {
+ include misc::statistics::base
+ include misc::statistics::user
+
+ $working_path = "${misc::statistics::base::working_path}/aggregator"
+
+ $script_path = "${working_path}/scripts"
+ $data_repo_path = "${working_path}/data"
+ $data_path = "${data_repo_path}/projectcounts/daily"
+ $user = $misc::statistics::user::username
+ $group = $misc::statistics::user::username
+
+ git::clone { 'aggregator_code':
+ ensure => 'latest',
+ directory => $script_path,
+ origin =>
'https://gerrit.wikimedia.org/r/p/analytics/aggregator.git',
+ owner => $user,
+ group => $group,
+ mode => '0750',
+ }
+
+ git::clone { 'aggregator_data':
+ ensure => 'latest',
+ directory => $data_repo_path,
+ origin =>
'https://gerrit.wikimedia.org/r/p/analytics/aggregator/data.git',
+ owner => $user,
+ group => $group,
+ mode => '0750',
+ }
+}
+
+# == Class misc::statistics::aggregator::projectcounts::jobs::aggregate
+# Aggregates hourly projectcounts files into daily per project csvs
+class misc::statistics::aggregator::projectcounts::jobs::aggregate {
+ include misc::statistics::aggregator::projectcounts::common
+
+ $working_path =
$misc::statistics::aggregator::projectcounts::common::working_path
+ $script_path =
$misc::statistics::aggregator::projectcounts::common::script_path
+ $data_path =
$misc::statistics::aggregator::projectcounts::common::data_path
+ $user =
$misc::statistics::aggregator::projectcounts::common::working_path::user
+ $group =
$misc::statistics::aggregator::projectcounts::common::working_path::group
+ $log_path = "${working_path}/log"
+
+ file { $log_path:
+ ensure => directory,
+ owner => $user,
+ group => $group,
+ mode => '0750',
+ }
+
+ cron { 'aggregator projectcounts aggregate':
+ command => "${script_path}/bin/aggregate_projectcounts --source
/mnt/hdfs/wmf/data/archive/webstats --target ${data_path} --first-date=`date
--date='-8 day' +\\%Y-\\%m-\\%d` --last-date=`date --date='-1 day'
+\\%Y-\\%m-\\%d` --push-target --log ${log_path}/`date
+\\%Y-\\%m-\\%d--\\%H-\\%M-\\%S`.log",
+ require => File[$log_path],
+ user => $user,
+ hour => '13',
+ minute => '0',
+ }
+}
+
+# == Class misc::statistics::aggregator::projectcounts::jobs::monitor
+# Basic monitoring of the aggregated daily per project csvs
+class misc::statistics::aggregator::projectcounts::jobs::monitor {
+ include misc::statistics::aggregator::projectcounts::common
+
+ $script_path =
$misc::statistics::aggregator::projectcounts::common::script_path
+ $data_path =
$misc::statistics::aggregator::projectcounts::common::data_path
+ $user =
$misc::statistics::aggregator::projectcounts::common::working_path::user
+
+ cron { 'aggregator projectcounts monitor':
+ monitor => "${script_path}/bin/check_validity_aggregated_projectcounts
--data ${data_path}",
+ user => $user,
+ hour => '13',
+ minute => '45',
+ }
+}
diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index d3ab51c..7d78e7c 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -81,3 +81,14 @@
# backup eventlogging logs
backup::set { 'a-eventlogging' : }
}
+
+class role::statistics::aggregator inherits role::statistics {
+ system::role { 'role::statistics':
+ description => 'statistics aggregator',
+ }
+
+ # aggregator: aggregate hourly projectcount files
+ include misc::statistics::aggregator::projectcounts::jobs::aggregate
+ # aggregator: monitor aggregated hourly projectcount files
+ include misc::statistics::aggregator::projectcounts::jobs::monitor
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 2be86fc..71ace6e 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -2595,6 +2595,9 @@
# and only readable by users in the
# analytics-privatedata-users group.
include role::analytics::password::research
+
+ # Run aggregation jobs
+ include role::statistics::aggregator
}
# stat1003 is a general purpose number cruncher for
--
To view, visit https://gerrit.wikimedia.org/r/172201
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id1a4a9cd2d6a401636ac844ce102d6ee61771e55
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: QChris <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits