Mforns has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/273487

Change subject: Replace limn::data::generate by reportupdater
......................................................................

Replace limn::data::generate by reportupdater

Moves the generation of reports out of the statistics module.
And makes it more generic, now supporting both report generators:
generate.py and reportupdater. The latter has now a dedicated
repository.

In the future we might want to change all report jobs to use
reportupdater and completely remove the generate.py option.

Bug: T127327
Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
---
M manifests/role/statistics.pp
A modules/reportupdater/manifests/init.pp
A modules/reportupdater/manifests/job.pp
D modules/statistics/manifests/limn/data.pp
D modules/statistics/manifests/limn/data/generate.pp
5 files changed, 158 insertions(+), 137 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/87/273487/1

diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index 72fe14a..a55d12b 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -66,17 +66,23 @@
     include geowiki::job::monitoring
 
 
-    # Use the statistics::limn::data::generate define
-    # to set up cron jobs to generate and generate limn files
-    # from research db and push them
-    statistics::limn::data::generate { 'mobile':     }
-    statistics::limn::data::generate { 'flow':       }
-    statistics::limn::data::generate { 'edit':       }
-    statistics::limn::data::generate { 'language':   }
-    statistics::limn::data::generate { 'extdist':    }
-    statistics::limn::data::generate { 'ee':         }
-    statistics::limn::data::generate { 'multimedia': }
-
+    # Set up reportupdater to create several reports on mysql research db.
+    include statistics::user
+    class { 'reportupdater':
+        user               => $statistics::user::username,
+        working_path       => $::statistics::working_path,
+        log_path           => '/var/log/limn-data',
+        output_path        => "${::statistics::working_path}/limn-public-data",
+        rsync_to           => 'stat1001.eqiad.wmnet::www/limn-public-data/',
+        generator          => 'generate',
+    }
+    reportupdater::job { 'mobile':     }
+    reportupdater::job { 'flow':       }
+    reportupdater::job { 'edit':       }
+    reportupdater::job { 'language':   }
+    reportupdater::job { 'extdist':    }
+    reportupdater::job { 'ee':         }
+    reportupdater::job { 'multimedia': }
 }
 
 
@@ -128,6 +134,16 @@
         group => 'statistics-privatedata-users',
         mode  => '0440',
     }
+
+    # Set up reportupdater to create browser reports on hive db.
+    include statistics::user
+    class { 'reportupdater':
+        user               => $statistics::user::username,
+        working_path       => $::statistics::working_path,
+    }
+    reportupdater::job { 'browser':
+        repository => 'reportupdater-queries',
+    }
 }
 
 
diff --git a/modules/reportupdater/manifests/init.pp 
b/modules/reportupdater/manifests/init.pp
new file mode 100644
index 0000000..87f5a73
--- /dev/null
+++ b/modules/reportupdater/manifests/init.pp
@@ -0,0 +1,73 @@
+# == Class reportupdater
+#
+# Sets up base directories and repositories for using reportupdater.
+# See: https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater
+#
+# == Parameters
+#   $user           - string. User to clone the repositories and attribute
+#                     the permits to.
+#   $working_path   - string. Base path where to put the necessary 
repositories.
+#   $log_path       - string. [optional] Path where to write the generator 
logs.
+#                     Default: '/var/log/reportupdater'.
+#   $output_path    - string. [optional] Path to output the generated reports.
+#                     Default: "${working_path}/reportupdater-output".
+#   $rsync_to       - string. [optional] If defined, all what is in the output
+#                     path will be rsync'd to $rsync_to.
+#   $generator      - string. The generator that will manage the reports.
+#                     Either 'generate' or 'reportupdater'.
+#
+class reportupdater(
+    $user,
+    $working_path,
+    $log_path          = '/var/log/reportupdater',
+    $output_path       = "${working_path}/reportupdater-output",
+    $rsync_to          = undef,
+    $generator         = 'reportupdater',
+) {
+
+    # There are 2 generator scripts for now. Each one has its own:
+    #   $git_remote   - Repository where to pull the generator from.
+    #   $source_path  - Directory where to clone the repository to.
+    #   $command      - Command to execute the generator.
+    #
+    case $generator {
+        generate: {
+            $git_remote  = 
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
+            $source_path = "${working_path}/limn-mobile-data"
+            $command     = "python ${source_path}/generate.py"
+        }
+        reportupdater: {
+            $git_remote  = 
'https://gerrit.wikimedia.org/r/p/analytics/reportupdater.git'
+            $source_path = "${working_path}/reportupdater"
+            $command     = "python ${source_path}/update_reports.py"
+        }
+    }
+
+    # Ensure the generator is cloned and latest version.
+    if !defined(Git::Clone['analytics/reportupdater']) {
+        git::clone { 'analytics/reportupdater':
+            ensure    => 'latest',
+            directory => $source_path,
+            origin    => $git_remote,
+            owner     => $user,
+            require   => [User[$user]],
+        }
+    }
+
+    # Make sure these are writeable by $user.
+    file { [$log_path, $output_path]:
+        ensure => 'directory',
+        owner  => $user,
+        group  => wikidev,
+        mode   => '0775',
+    }
+
+    # If specified, rsync anything generated in $public_dir to $rsync_to.
+    if $rsync_to != undef {
+        cron { 'rsync_reportupdater_output':
+            command => "/usr/bin/rsync -rt ${output_path}/* ${rsync_to}",
+            user    => $user,
+            minute  => 15,
+        }
+    }
+}
diff --git a/modules/reportupdater/manifests/job.pp 
b/modules/reportupdater/manifests/job.pp
new file mode 100644
index 0000000..e960e5f
--- /dev/null
+++ b/modules/reportupdater/manifests/job.pp
@@ -0,0 +1,58 @@
+# == Define reportupdater::job
+#
+# Sets up hourly cron jobs to run a script which generates and updates
+# tsv datafiles for a set of given queries.
+#
+# This requires that a repository with config and queries for the script
+# exists at https://gerrit.wikimedia.org/r/p/analytics/${repository}.git.
+#
+# == Parameters
+#   repository   - string. [optional] Name of the repository holding the
+#                  queries and the config. Default: "limn-${title}-data".
+#   query_dir    - string. [optional] Path of the directory holding the
+#                  queries and the config within the mentioned repository.
+#                  Default: "${title}".
+#
+# == Usage
+#   reportupdater::job { 'mobile': }
+#   reportupdater::job { 'browser':
+#       repository => 'reportupdater-queries',
+#   }
+#
+define reportupdater::job(
+    $repository = "limn-${title}-data",
+    $query_dir  = "${title}",
+) {
+    Class['::reportupdater'] -> Reportupdater::Job[$title]
+
+    $user    = $::reportupdater::user
+    $command = $::reportupdater::command
+
+    # A repo at analytics/${repository}.git had better exist!
+    $git_remote = 
"https://gerrit.wikimedia.org/r/p/analytics/${repository}.git";
+
+    # Directory at which to clone $git_remote.
+    $source_path = "${::reportupdater::working_path}/${repository}"
+
+    # Config directory for this report generating job.
+    $query_path = "${$source_path}/${query_dir}"
+
+    # Log file for the generate/reportupdater cron job.
+    $log_file = "${::reportupdater::log_path}/${repository}_${title}.log"
+
+    if !defined(Git::Clone["analytics/${repository}"]) {
+        git::clone { "analytics/${repository}":
+            ensure    => 'latest',
+            directory => $source_path,
+            origin    => $git_remote,
+            owner     => $user,
+            require   => [User[$user]],
+        }
+    }
+
+    cron { "generate_${repository}_${title}":
+        command => "${command} ${query_path} >> ${log_file} 2>&1",
+        user    => $user,
+        minute  => 0,
+    }
+}
diff --git a/modules/statistics/manifests/limn/data.pp 
b/modules/statistics/manifests/limn/data.pp
deleted file mode 100644
index 1613083..0000000
--- a/modules/statistics/manifests/limn/data.pp
+++ /dev/null
@@ -1,76 +0,0 @@
-
-# == Class statistics::limn::data
-# Sets up base directories and repositories
-# for using the statistics::limn::data::generate() define.
-#
-class statistics::limn::data {
-    Class['::statistics::compute'] -> Class['::statistics::limn::data']
-    Class['::statistics::user']    -> Class['::statistics::limn::data']
-
-    $working_path      = '/srv'
-
-    # Directory where the repository of the generate.py will be cloned.
-    $source_dir        = "${working_path}/limn-mobile-data"
-
-    # generate.py command to run in a cron.
-    $command           = "${source_dir}/generate.py"
-
-    # my.cnf credentials file. This is the file rendered by
-    # mysql::config::client { 'stats-research': } defined in 
statistics::compute
-    $mysql_credentials = '/etc/mysql/conf.d/stats-research-client.cnf'
-
-    # cron job logs will be kept here
-    $log_dir           = '/var/log/limn-data'
-
-    # generate.py's repository
-    $git_remote        = 
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
-
-    # public data directory.  Data will be synced from here to a public web 
host.
-    $public_dir        = "${working_path}/limn-public-data"
-
-    # Rsync generated data to stat1001 at 
http://datasets.wikimedia.org/limn-public-data/
-    $rsync_to          = 'stat1001.eqiad.wmnet::www/limn-public-data/'
-
-    # user to own files and run cron job as (stats).
-    $user              = $::statistics::user::username
-
-    # This path is used in the limn-mobile-data config.
-    # Symlink this until they change it.
-    # 
https://github.com/wikimedia/analytics-limn-mobile-data/blob/2321a6a0976b1805e79fecd495cf12ed7c6565a0/mobile/config.yaml#L5
-    file { "${working_path}/.my.cnf.research":
-        ensure  => 'link',
-        target  => $mysql_credentials,
-        require => Mysql::Config::Client['stats-research'],
-    }
-
-    # TODO:  This repository contains the generate.py script.
-    # Other limn data repositories only have config and data
-    # directories.  generate.py should be abstracted out into
-    # a general purupose limn data generator.
-    # For now, all limn data classes rely on this repository
-    # and generate.py script to be present.
-    if !defined(Git::Clone['analytics/limn-mobile-data']) {
-        git::clone { 'analytics/limn-mobile-data':
-            ensure    => 'latest',
-            directory => $source_dir,
-            origin    => $git_remote,
-            owner     => $user,
-            require   => [User[$user]],
-        }
-    }
-
-    # Make sure these are writeable by $user.
-    file { [$log_dir, $public_dir]:
-        ensure => 'directory',
-        owner  => $user,
-        group  => wikidev,
-        mode   => '0775',
-    }
-
-    # Rsync anything generated in $public_dir to $rsync_to
-    cron { 'rsync_limn_public_data':
-        command => "/usr/bin/rsync -rt ${public_dir}/* ${rsync_to}",
-        user    => $user,
-        minute  => 15,
-    }
-}
diff --git a/modules/statistics/manifests/limn/data/generate.pp 
b/modules/statistics/manifests/limn/data/generate.pp
deleted file mode 100644
index a3132fc..0000000
--- a/modules/statistics/manifests/limn/data/generate.pp
+++ /dev/null
@@ -1,50 +0,0 @@
-
-# == Define statistics::limn::data::generate
-#
-# Sets up daily cron jobs to run a script which
-# generates csv datafiles and rsyncs those files
-# to stat1001 so they can be served publicly.
-#
-# This requires that a repository with config to pass to generate.py
-# exists at https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git.
-#
-# == Usage
-#   statistics::limn::data::generate { 'mobile': }
-#   statistics::limn::data::generate { 'flow': }
-#   ...
-#
-define statistics::limn::data::generate() {
-    require ::statistics::limn::data
-
-    $user    = $::statistics::limn::data::user
-    $command = $::statistics::limn::data::command
-
-    # A repo at analytics/limn-${title}-data.git had better exist!
-    $git_remote        = 
"https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git";
-
-    # Directory at which to clone $git_remote
-    $source_dir        = 
"${::statistics::limn::data::working_path}/limn-${title}-data"
-
-    # config directory for this limn data generate job
-    $config_dir        = "${$source_dir}/${title}/"
-
-    # log file for the generate cron job
-    $log               = 
"${::statistics::limn::data::log_dir}/limn-${title}-data.log"
-
-    if !defined(Git::Clone["analytics/limn-${title}-data"]) {
-        git::clone { "analytics/limn-${title}-data":
-            ensure    => 'latest',
-            directory => $source_dir,
-            origin    => $git_remote,
-            owner     => $user,
-            require   => [User[$user]],
-        }
-    }
-
-    # This will generate data into $public_dir/${title} (if configured 
correctly)
-    cron { "generate_${title}_limn_public_data":
-        command => "python ${command} ${config_dir} >> ${log} 2>&1",
-        user    => $user,
-        minute  => 0,
-    }
-}

-- 
To view, visit https://gerrit.wikimedia.org/r/273487
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I79cafe41b1e6002ee3e6c9087809ed9515424ad6
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Mforns <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to