Ottomata has submitted this change and it was merged.

Change subject: Replace limn::data::generate by reportupdater
......................................................................


Replace limn::data::generate by reportupdater

Moves the generation of reports out of the statistics module.
Reportupdater now has a dedicated repository.

Bug: T127327
Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
---
M manifests/role/statistics.pp
A modules/reportupdater/manifests/init.pp
A modules/reportupdater/manifests/job.pp
D modules/statistics/manifests/limn/data.pp
D modules/statistics/manifests/limn/data/generate.pp
5 files changed, 181 insertions(+), 136 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index c080e21..9368912 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -74,17 +74,44 @@
     include geowiki::job::monitoring
 
 
-    # Use the statistics::limn::data::generate define
-    # to set up cron jobs to generate and generate limn files
-    # from research db and push them
-    statistics::limn::data::generate { 'mobile':     }
-    statistics::limn::data::generate { 'flow':       }
-    statistics::limn::data::generate { 'edit':       }
-    statistics::limn::data::generate { 'language':   }
-    statistics::limn::data::generate { 'extdist':    }
-    statistics::limn::data::generate { 'ee':         }
-    statistics::limn::data::generate { 'multimedia': }
+    # Set up reportupdater to be executed on this machine
+    # and rsync the output base path to stat1001.
+    class { 'reportupdater':
+        base_path => "${::statistics::working_path}/reportupdater",
+        user      => $::statistics::user::username,
+        rsync_to  => 'stat1001.eqiad.wmnet::www/limn-public-data/',
+    }
 
+    # Set up various jobs to be executed by reportupdater
+    # creating several reports on mysql research db.
+    reportupdater::job { 'mobile':
+        repository => 'limn-mobile-data',
+        output_dir => 'mobile/datafiles',
+    }
+    reportupdater::job { 'flow':
+        repository => 'limn-flow-data',
+        output_dir =>  'flow/datafiles',
+    }
+    reportupdater::job { 'edit':
+        repository => 'limn-edit-data',
+        output_dir => 'metrics',
+    }
+    reportupdater::job { 'language':
+        repository => 'limn-language-data',
+        output_dir => 'metrics/beta-feature-enables',
+    }
+    reportupdater::job { 'extdist':
+        repository => 'limn-extdist-data',
+        output_dir => 'extdist/datafiles',
+    }
+    reportupdater::job { 'ee':
+        repository => 'limn-ee-data',
+        output_dir => 'metrics/echo',
+    }
+    reportupdater::job { 'multimedia':
+        repository => 'limn-multimedia-data',
+        output_dir => 'metrics/beta-feature-enables',
+    }
 }
 
 
@@ -136,6 +163,16 @@
         group => 'statistics-privatedata-users',
         mode  => '0440',
     }
+
+    # Set up reportupdater to be executed on this machine.
+    class { 'reportupdater':
+        base_path => "${::statistics::working_path}/reportupdater",
+        user      => $::statistics::user::username,
+    }
+    # Set up a job to create browser reports on hive db.
+    reportupdater::job { 'browser':
+        repository  => 'reportupdater-queries',
+    }
 }
 
 
diff --git a/modules/reportupdater/manifests/init.pp 
b/modules/reportupdater/manifests/init.pp
new file mode 100644
index 0000000..bb81f67
--- /dev/null
+++ b/modules/reportupdater/manifests/init.pp
@@ -0,0 +1,63 @@
+# == Class reportupdater
+#
+# Sets up repositories and rsync for using reportupdater.
+# See: https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater
+#
+# == Parameters
+#   $user             - string. User for cloning repositories and
+#                       folder permits.
+#
+#   $base_path        - string. Base path where to put reportupdater's
+#                       repository, job query repositories, and data output.
+#                       Default: /srv/reportupdater
+#
+#   $rsync_to         - string. [optional] If defined, everything i
+#                       $base_path/output will be rsynced to $rsync_to.
+#
+class reportupdater(
+    $user,
+    $base_path = '/srv/reportupdater',
+    $rsync_to  = undef,
+) {
+    # Path at which reportupdater source will be cloned.
+    $path = "${base_path}/reportupdater"
+
+    # Path in which all reportupdater output will be stored.
+    $output_path = "${base_path}/output"
+
+    # Path in which all reportupdater jobs will log.
+    $log_path = "${base_path}/log"
+
+    # Path in which individual reportupdater job repositories
+    # will be cloned.
+    $job_repositories_path = "${::reportupdater::base_path}/jobs"
+
+    # Ensure these directories exist and are writeable by $user.
+    file { [$base_path, $output_path, $log_path, $job_repositories_path]:
+        ensure => 'directory',
+        owner  => $user,
+        group  => 'wikidev',
+        mode   => '0775',
+    }
+
+    # Ensure reportupdater is cloned and latest version.
+    git::clone { 'analytics/reportupdater':
+        ensure    => 'latest',
+        directory => $path,
+        origin    => 
'https://gerrit.wikimedia.org/r/p/analytics/reportupdater.git',
+        owner     => $user,
+        require   => File[$base_path],
+    }
+
+    # If specified, rsync anything generated in $output_base_path to $rsync_to.
+    $rsync_cron_ensure = $rsync_to ? {
+        undef   => 'absent',
+        default => 'present',
+    }
+    cron { 'reportupdater_rsync_to':
+        ensure  => $rsync_cron_ensure,
+        command => "/usr/bin/rsync -rt ${output_path}/* ${rsync_to}",
+        user    => $user,
+        minute  => 15,
+    }
+}
diff --git a/modules/reportupdater/manifests/job.pp 
b/modules/reportupdater/manifests/job.pp
new file mode 100644
index 0000000..c334abd
--- /dev/null
+++ b/modules/reportupdater/manifests/job.pp
@@ -0,0 +1,71 @@
+# == Define reportupdater::job
+#
+# Sets up hourly cron jobs to run reportupdater, which generates
+# and updates tsv reports for a set of given queries.
+#
+# This requires that a repository with config and queries for the script
+# exists at https://gerrit.wikimedia.org/r/p/analytics/${repository}.git.
+#
+# == Parameters
+#   title        - string. Name of query dir inside of $repository.
+#                  a $title directory with reportupdater query config
+#                  must exist inside of $repository.
+#
+#   repository   - string. Name of the query repository in gerrit in the
+#                  analytics/ namespace.  All reportupdater job
+#                  repositories must be in analytics/
+#                  E.g. analytics/reportupdater-queries
+#
+#   output_dir   - string. [optional] Relative path where to write the reports.
+#                  This will be relative to $::reportupdater::base_path/output
+#                  Default: $title
+#
+# == Usage
+#   reportupdater::job { 'browser': }
+#
+#   reportupdater::job { 'mobile':
+#       repository  => 'limn-mobile-data',
+#       output_dir  => "mobile/datafiles",
+#   }
+#
+define reportupdater::job(
+    $repository,
+    $output_dir  = $title,
+)
+{
+    Class['::reportupdater'] -> Reportupdater::Job[$title]
+
+    # Name of the repository in gerrit.
+    # All reportupdater job repositories are in the analytics/ namespace.
+    $repository_name = "analytics/${repository}"
+
+    # Path at which this reportupdater job repository will be cloned.
+    $path            = 
"${::reportupdater::job_repositories_path}/${repository}"
+
+    # Path of the query configuration directory inside of $repository_name.
+    $query_path      = "${path}/${title}"
+
+    # Path at which the job will store logs.
+    $log_file        = 
"${$::reportupdater::log_path}/${repository}-${title}.log"
+
+    # Path at which the job will store its report output.
+    $output_path     = "${::reportupdater::output_path}/${output_dir}"
+
+    # Ensure the query repository is cloned and latest version.
+    # It is possible that multiple jobs will use the same repository,
+    # so wrap this in an if !defined.
+    if !defined(Git::Clone[$repository_name]) {
+        git::clone { $repository_name:
+            ensure    => 'latest',
+            directory => $path,
+            origin    => 
"https://gerrit.wikimedia.org/r/p/${repository_name}.git";,
+            owner     => $::reportupdater::user
+        }
+    }
+
+    cron { "reportupdater_${repository}-${title}":
+        command => "python ${::reportupdater::path}/update_reports.py 
${query_path} ${output_path} >> ${log_file} 2>&1",
+        user    => $user,
+        minute  => 0,
+    }
+}
diff --git a/modules/statistics/manifests/limn/data.pp 
b/modules/statistics/manifests/limn/data.pp
deleted file mode 100644
index 1613083..0000000
--- a/modules/statistics/manifests/limn/data.pp
+++ /dev/null
@@ -1,76 +0,0 @@
-
-# == Class statistics::limn::data
-# Sets up base directories and repositories
-# for using the statistics::limn::data::generate() define.
-#
-class statistics::limn::data {
-    Class['::statistics::compute'] -> Class['::statistics::limn::data']
-    Class['::statistics::user']    -> Class['::statistics::limn::data']
-
-    $working_path      = '/srv'
-
-    # Directory where the repository of the generate.py will be cloned.
-    $source_dir        = "${working_path}/limn-mobile-data"
-
-    # generate.py command to run in a cron.
-    $command           = "${source_dir}/generate.py"
-
-    # my.cnf credentials file. This is the file rendered by
-    # mysql::config::client { 'stats-research': } defined in 
statistics::compute
-    $mysql_credentials = '/etc/mysql/conf.d/stats-research-client.cnf'
-
-    # cron job logs will be kept here
-    $log_dir           = '/var/log/limn-data'
-
-    # generate.py's repository
-    $git_remote        = 
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
-
-    # public data directory.  Data will be synced from here to a public web 
host.
-    $public_dir        = "${working_path}/limn-public-data"
-
-    # Rsync generated data to stat1001 at 
http://datasets.wikimedia.org/limn-public-data/
-    $rsync_to          = 'stat1001.eqiad.wmnet::www/limn-public-data/'
-
-    # user to own files and run cron job as (stats).
-    $user              = $::statistics::user::username
-
-    # This path is used in the limn-mobile-data config.
-    # Symlink this until they change it.
-    # 
https://github.com/wikimedia/analytics-limn-mobile-data/blob/2321a6a0976b1805e79fecd495cf12ed7c6565a0/mobile/config.yaml#L5
-    file { "${working_path}/.my.cnf.research":
-        ensure  => 'link',
-        target  => $mysql_credentials,
-        require => Mysql::Config::Client['stats-research'],
-    }
-
-    # TODO:  This repository contains the generate.py script.
-    # Other limn data repositories only have config and data
-    # directories.  generate.py should be abstracted out into
-    # a general purupose limn data generator.
-    # For now, all limn data classes rely on this repository
-    # and generate.py script to be present.
-    if !defined(Git::Clone['analytics/limn-mobile-data']) {
-        git::clone { 'analytics/limn-mobile-data':
-            ensure    => 'latest',
-            directory => $source_dir,
-            origin    => $git_remote,
-            owner     => $user,
-            require   => [User[$user]],
-        }
-    }
-
-    # Make sure these are writeable by $user.
-    file { [$log_dir, $public_dir]:
-        ensure => 'directory',
-        owner  => $user,
-        group  => wikidev,
-        mode   => '0775',
-    }
-
-    # Rsync anything generated in $public_dir to $rsync_to
-    cron { 'rsync_limn_public_data':
-        command => "/usr/bin/rsync -rt ${public_dir}/* ${rsync_to}",
-        user    => $user,
-        minute  => 15,
-    }
-}
diff --git a/modules/statistics/manifests/limn/data/generate.pp 
b/modules/statistics/manifests/limn/data/generate.pp
deleted file mode 100644
index a3132fc..0000000
--- a/modules/statistics/manifests/limn/data/generate.pp
+++ /dev/null
@@ -1,50 +0,0 @@
-
-# == Define statistics::limn::data::generate
-#
-# Sets up daily cron jobs to run a script which
-# generates csv datafiles and rsyncs those files
-# to stat1001 so they can be served publicly.
-#
-# This requires that a repository with config to pass to generate.py
-# exists at https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git.
-#
-# == Usage
-#   statistics::limn::data::generate { 'mobile': }
-#   statistics::limn::data::generate { 'flow': }
-#   ...
-#
-define statistics::limn::data::generate() {
-    require ::statistics::limn::data
-
-    $user    = $::statistics::limn::data::user
-    $command = $::statistics::limn::data::command
-
-    # A repo at analytics/limn-${title}-data.git had better exist!
-    $git_remote        = 
"https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git";
-
-    # Directory at which to clone $git_remote
-    $source_dir        = 
"${::statistics::limn::data::working_path}/limn-${title}-data"
-
-    # config directory for this limn data generate job
-    $config_dir        = "${$source_dir}/${title}/"
-
-    # log file for the generate cron job
-    $log               = 
"${::statistics::limn::data::log_dir}/limn-${title}-data.log"
-
-    if !defined(Git::Clone["analytics/limn-${title}-data"]) {
-        git::clone { "analytics/limn-${title}-data":
-            ensure    => 'latest',
-            directory => $source_dir,
-            origin    => $git_remote,
-            owner     => $user,
-            require   => [User[$user]],
-        }
-    }
-
-    # This will generate data into $public_dir/${title} (if configured 
correctly)
-    cron { "generate_${title}_limn_public_data":
-        command => "python ${command} ${config_dir} >> ${log} 2>&1",
-        user    => $user,
-        minute  => 0,
-    }
-}

-- 
To view, visit https://gerrit.wikimedia.org/r/273487
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
Gerrit-PatchSet: 14
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Mforns <[email protected]>
Gerrit-Reviewer: Elukey <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to