Mforns has uploaded a new change for review.
https://gerrit.wikimedia.org/r/273487
Change subject: Replace limn::data::generate by reportupdater
......................................................................
Replace limn::data::generate by reportupdater
Moves the generation of reports out of the statistics module.
And makes it more generic, now supporting both report generators:
generate.py and reportupdater. The latter has now a dedicated
repository.
In the future we might want to change all report jobs to use
reportupdater and completely remove the generate.py option.
Bug: T127327
Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
---
M manifests/role/statistics.pp
A modules/reportupdater/manifests/init.pp
A modules/reportupdater/manifests/job.pp
D modules/statistics/manifests/limn/data.pp
D modules/statistics/manifests/limn/data/generate.pp
5 files changed, 158 insertions(+), 137 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/87/273487/1
diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index 72fe14a..a55d12b 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -66,17 +66,23 @@
include geowiki::job::monitoring
- # Use the statistics::limn::data::generate define
- # to set up cron jobs to generate and generate limn files
- # from research db and push them
- statistics::limn::data::generate { 'mobile': }
- statistics::limn::data::generate { 'flow': }
- statistics::limn::data::generate { 'edit': }
- statistics::limn::data::generate { 'language': }
- statistics::limn::data::generate { 'extdist': }
- statistics::limn::data::generate { 'ee': }
- statistics::limn::data::generate { 'multimedia': }
-
+ # Set up reportupdater to create several reports on mysql research db.
+ include statistics::user
+ class { 'reportupdater':
+ user => $statistics::user::username,
+ working_path => $::statistics::working_path,
+ log_path => '/var/log/limn-data',
+ output_path => "${::statistics::working_path}/limn-public-data",
+ rsync_to => 'stat1001.eqiad.wmnet::www/limn-public-data/',
+ generator => 'generate',
+ }
+ reportupdater::job { 'mobile': }
+ reportupdater::job { 'flow': }
+ reportupdater::job { 'edit': }
+ reportupdater::job { 'language': }
+ reportupdater::job { 'extdist': }
+ reportupdater::job { 'ee': }
+ reportupdater::job { 'multimedia': }
}
@@ -128,6 +134,16 @@
group => 'statistics-privatedata-users',
mode => '0440',
}
+
+ # Set up reportupdater to create browser reports on hive db.
+ include statistics::user
+ class { 'reportupdater':
+ user => $statistics::user::username,
+ working_path => $::statistics::working_path,
+ }
+ reportupdater::job { 'browser':
+ repository => 'reportupdater-queries',
+ }
}
diff --git a/modules/reportupdater/manifests/init.pp
b/modules/reportupdater/manifests/init.pp
new file mode 100644
index 0000000..87f5a73
--- /dev/null
+++ b/modules/reportupdater/manifests/init.pp
@@ -0,0 +1,73 @@
+# == Class reportupdater
+#
+# Sets up base directories and repositories for using reportupdater.
+# See: https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater
+#
+# == Parameters
+# $user - string. User to clone the repositories and attribute
+# the permits to.
+# $working_path - string. Base path where to put the necessary
repositories.
+# $log_path - string. [optional] Path where to write the generator
logs.
+# Default: '/var/log/reportupdater'.
+# $output_path - string. [optional] Path to output the generated reports.
+# Default: "${working_path}/reportupdater-output".
+# $rsync_to - string. [optional] If defined, all what is in the output
+# path will be rsync'd to $rsync_to.
+# $generator - string. The generator that will manage the reports.
+# Either 'generate' or 'reportupdater'.
+#
+class reportupdater(
+ $user,
+ $working_path,
+ $log_path = '/var/log/reportupdater',
+ $output_path = "${working_path}/reportupdater-output",
+ $rsync_to = undef,
+ $generator = 'reportupdater',
+) {
+
+ # There are 2 generator scripts for now. Each one has its own:
+ # $git_remote - Repository where to pull the generator from.
+ # $source_path - Directory where to clone the repository to.
+ # $command - Command to execute the generator.
+ #
+ case $generator {
+ generate: {
+ $git_remote =
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
+ $source_path = "${working_path}/limn-mobile-data"
+ $command = "python ${source_path}/generate.py"
+ }
+ reportupdater: {
+ $git_remote =
'https://gerrit.wikimedia.org/r/p/analytics/reportupdater.git'
+ $source_path = "${working_path}/reportupdater"
+ $command = "python ${source_path}/update_reports.py"
+ }
+ }
+
+ # Ensure the generator is cloned and latest version.
+ if !defined(Git::Clone['analytics/reportupdater']) {
+ git::clone { 'analytics/reportupdater':
+ ensure => 'latest',
+ directory => $source_path,
+ origin => $git_remote,
+ owner => $user,
+ require => [User[$user]],
+ }
+ }
+
+ # Make sure these are writeable by $user.
+ file { [$log_path, $output_path]:
+ ensure => 'directory',
+ owner => $user,
+ group => wikidev,
+ mode => '0775',
+ }
+
+ # If specified, rsync anything generated in $public_dir to $rsync_to.
+ if $rsync_to != undef {
+ cron { 'rsync_reportupdater_output':
+ command => "/usr/bin/rsync -rt ${output_path}/* ${rsync_to}",
+ user => $user,
+ minute => 15,
+ }
+ }
+}
diff --git a/modules/reportupdater/manifests/job.pp
b/modules/reportupdater/manifests/job.pp
new file mode 100644
index 0000000..e960e5f
--- /dev/null
+++ b/modules/reportupdater/manifests/job.pp
@@ -0,0 +1,58 @@
+# == Define reportupdater::job
+#
+# Sets up hourly cron jobs to run a script which generates and updates
+# tsv datafiles for a set of given queries.
+#
+# This requires that a repository with config and queries for the script
+# exists at https://gerrit.wikimedia.org/r/p/analytics/${repository}.git.
+#
+# == Parameters
+# repository - string. [optional] Name of the repository holding the
+# queries and the config. Default: "limn-${title}-data".
+# query_dir - string. [optional] Path of the directory holding the
+# queries and the config within the mentioned repository.
+# Default: "${title}".
+#
+# == Usage
+# reportupdater::job { 'mobile': }
+# reportupdater::job { 'browser':
+# repository => 'reportupdater-queries',
+# }
+#
+define reportupdater::job(
+ $repository = "limn-${title}-data",
+ $query_dir = "${title}",
+) {
+ Class['::reportupdater'] -> Reportupdater::Job[$title]
+
+ $user = $::reportupdater::user
+ $command = $::reportupdater::command
+
+ # A repo at analytics/${repository}.git had better exist!
+ $git_remote =
"https://gerrit.wikimedia.org/r/p/analytics/${repository}.git"
+
+ # Directory at which to clone $git_remote.
+ $source_path = "${::reportupdater::working_path}/${repository}"
+
+ # Config directory for this report generating job.
+ $query_path = "${$source_path}/${query_dir}"
+
+ # Log file for the generate/reportupdater cron job.
+ $log_file = "${::reportupdater::log_path}/${repository}_${title}.log"
+
+ if !defined(Git::Clone["analytics/${repository}"]) {
+ git::clone { "analytics/${repository}":
+ ensure => 'latest',
+ directory => $source_path,
+ origin => $git_remote,
+ owner => $user,
+ require => [User[$user]],
+ }
+ }
+
+ cron { "generate_${repository}_${title}":
+ command => "${command} ${query_path} >> ${log_file} 2>&1",
+ user => $user,
+ minute => 0,
+ }
+}
diff --git a/modules/statistics/manifests/limn/data.pp
b/modules/statistics/manifests/limn/data.pp
deleted file mode 100644
index 1613083..0000000
--- a/modules/statistics/manifests/limn/data.pp
+++ /dev/null
@@ -1,76 +0,0 @@
-
-# == Class statistics::limn::data
-# Sets up base directories and repositories
-# for using the statistics::limn::data::generate() define.
-#
-class statistics::limn::data {
- Class['::statistics::compute'] -> Class['::statistics::limn::data']
- Class['::statistics::user'] -> Class['::statistics::limn::data']
-
- $working_path = '/srv'
-
- # Directory where the repository of the generate.py will be cloned.
- $source_dir = "${working_path}/limn-mobile-data"
-
- # generate.py command to run in a cron.
- $command = "${source_dir}/generate.py"
-
- # my.cnf credentials file. This is the file rendered by
- # mysql::config::client { 'stats-research': } defined in
statistics::compute
- $mysql_credentials = '/etc/mysql/conf.d/stats-research-client.cnf'
-
- # cron job logs will be kept here
- $log_dir = '/var/log/limn-data'
-
- # generate.py's repository
- $git_remote =
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
-
- # public data directory. Data will be synced from here to a public web
host.
- $public_dir = "${working_path}/limn-public-data"
-
- # Rsync generated data to stat1001 at
http://datasets.wikimedia.org/limn-public-data/
- $rsync_to = 'stat1001.eqiad.wmnet::www/limn-public-data/'
-
- # user to own files and run cron job as (stats).
- $user = $::statistics::user::username
-
- # This path is used in the limn-mobile-data config.
- # Symlink this until they change it.
- #
https://github.com/wikimedia/analytics-limn-mobile-data/blob/2321a6a0976b1805e79fecd495cf12ed7c6565a0/mobile/config.yaml#L5
- file { "${working_path}/.my.cnf.research":
- ensure => 'link',
- target => $mysql_credentials,
- require => Mysql::Config::Client['stats-research'],
- }
-
- # TODO: This repository contains the generate.py script.
- # Other limn data repositories only have config and data
- # directories. generate.py should be abstracted out into
- # a general purupose limn data generator.
- # For now, all limn data classes rely on this repository
- # and generate.py script to be present.
- if !defined(Git::Clone['analytics/limn-mobile-data']) {
- git::clone { 'analytics/limn-mobile-data':
- ensure => 'latest',
- directory => $source_dir,
- origin => $git_remote,
- owner => $user,
- require => [User[$user]],
- }
- }
-
- # Make sure these are writeable by $user.
- file { [$log_dir, $public_dir]:
- ensure => 'directory',
- owner => $user,
- group => wikidev,
- mode => '0775',
- }
-
- # Rsync anything generated in $public_dir to $rsync_to
- cron { 'rsync_limn_public_data':
- command => "/usr/bin/rsync -rt ${public_dir}/* ${rsync_to}",
- user => $user,
- minute => 15,
- }
-}
diff --git a/modules/statistics/manifests/limn/data/generate.pp
b/modules/statistics/manifests/limn/data/generate.pp
deleted file mode 100644
index a3132fc..0000000
--- a/modules/statistics/manifests/limn/data/generate.pp
+++ /dev/null
@@ -1,50 +0,0 @@
-
-# == Define statistics::limn::data::generate
-#
-# Sets up daily cron jobs to run a script which
-# generates csv datafiles and rsyncs those files
-# to stat1001 so they can be served publicly.
-#
-# This requires that a repository with config to pass to generate.py
-# exists at https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git.
-#
-# == Usage
-# statistics::limn::data::generate { 'mobile': }
-# statistics::limn::data::generate { 'flow': }
-# ...
-#
-define statistics::limn::data::generate() {
- require ::statistics::limn::data
-
- $user = $::statistics::limn::data::user
- $command = $::statistics::limn::data::command
-
- # A repo at analytics/limn-${title}-data.git had better exist!
- $git_remote =
"https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git"
-
- # Directory at which to clone $git_remote
- $source_dir =
"${::statistics::limn::data::working_path}/limn-${title}-data"
-
- # config directory for this limn data generate job
- $config_dir = "${$source_dir}/${title}/"
-
- # log file for the generate cron job
- $log =
"${::statistics::limn::data::log_dir}/limn-${title}-data.log"
-
- if !defined(Git::Clone["analytics/limn-${title}-data"]) {
- git::clone { "analytics/limn-${title}-data":
- ensure => 'latest',
- directory => $source_dir,
- origin => $git_remote,
- owner => $user,
- require => [User[$user]],
- }
- }
-
- # This will generate data into $public_dir/${title} (if configured
correctly)
- cron { "generate_${title}_limn_public_data":
- command => "python ${command} ${config_dir} >> ${log} 2>&1",
- user => $user,
- minute => 0,
- }
-}
--
To view, visit https://gerrit.wikimedia.org/r/273487
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Mforns <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits