Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/171553

Change subject: Slightly refactor  misc::statistics::limn::mobile_data_sync
......................................................................

Slightly refactor  misc::statistics::limn::mobile_data_sync

This will allow for multliple teams to more easily run generate.py
and sync data over to stat1001 using different limn dataset sources.

TODO: put generate.py into a generic limn generator repository,
rather than requiring that limn-mobile-data repo is cloned
for all limn::data::generate jobs.

Change-Id: Iec0f862ca3e4ee63570f1c495260807071b1e2a4
---
M manifests/misc/statistics.pp
M manifests/site.pp
2 files changed, 113 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/53/171553/1

diff --git a/manifests/misc/statistics.pp b/manifests/misc/statistics.pp
index acb8e0d..3751ad1 100644
--- a/manifests/misc/statistics.pp
+++ b/manifests/misc/statistics.pp
@@ -701,45 +701,41 @@
     }
 }
 
-
-# Class: misc::statistics::limn::mobile_data_sync
+# == Class misc::statistics::limn::data
+# Sets up base directories and repositories
+# for using the misc::statistics::limn::data::generate() define.
 #
-# Sets up daily cron jobs to run a script which
-# generates csv datafiles from mobile apps statistics
-# then rsyncs those files to stat1001 so they can be served publicly
-class misc::statistics::limn::mobile_data_sync {
+class misc::statistics::limn::data {
     include misc::statistics::base
     include misc::statistics::stats_researchdb_password
 
+    # Either '/a' or '/srv', depending on the server. :/
     $working_path      = $misc::statistics::base::working_path
 
+    # Directory where the repository of the generate.py will be cloned.
     $source_dir        = "${working_path}/limn-mobile-data"
+
+    # generate.py command to run in a cron.
     $command           = "${source_dir}/generate.py"
-    $config            = "${source_dir}/mobile/"
+
+    # my.cnf credentials file. This is the file rendered by
+    # misc::statistics::stats_researchdb_password.
     $mysql_credentials = '/etc/mysql/conf.d/stats-research-client.cnf'
-    $rsync_from        = "${working_path}/limn-public-data"
-    $output            = "${rsync_from}/mobile/datafiles"
-    $log               = '/var/log/limn-mobile-data.log'
-    $gerrit_repo       = 
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
+
+    # cron job logs will be kept here
+    $log_dir           = '/var/log/limn-data'
+
+    # generate.py's repository
+    $git_remote        = 
'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git'
+
+    # public data directory.  Data will be synced from here to a public web 
host.
+    $public_dir        = "${working_path}/limn-public-data"
+
+    # Rsync generated data to stat1001 at 
http://datasets.wikimedia.org/limn-public-data/
+    $rsync_to          = "stat1001.wikimedia.org::www/limn-public-data/"
+
+    # user to own files and run cron job as (stats).
     $user              = $misc::statistics::user::username
-
-    $db_user           = $passwords::mysql::research::user
-    $db_pass           = $passwords::mysql::research::pass
-
-    git::clone { 'analytics/limn-mobile-data':
-        ensure    => 'latest',
-        directory => $source_dir,
-        origin    => $gerrit_repo,
-        owner     => $user,
-        require   => [User[$user]],
-    }
-
-    file { $log:
-        ensure  => 'present',
-        owner   => $user,
-        group   => $user,
-        mode    => '0660',
-    }
 
     # This path is used in the limn-mobile-data config.
     # Symlink this until they change it.
@@ -749,20 +745,104 @@
         target => $mysql_credentials,
     }
 
-    file { [$source_dir, $rsync_from, $output]:
+    # TODO:  This repository contains the generate.py script.
+    # Other limn data repositories only have config and data
+    # directories.  generate.py should be abstracted out into
+    # a general purupose limn data generator.
+    # For now, all limn data classes rely on this repository
+    # and generate.py script to be present.
+    if !defined(Git::Clone['analytics/limn-mobile-data']) {
+        git::clone { 'analytics/limn-mobile-data':
+            ensure    => 'latest',
+            directory => $source_dir,
+            origin    => $git_remote,
+            owner     => $user,
+            require   => [User[$user]],
+        }
+    }
+
+    # Make sure these are writeable by $user.
+    file { [$log_dir, $source_dir, $public_data_dir]:
         ensure => 'directory',
         owner  => $user,
         group  => wikidev,
         mode   => '0775',
     }
+}
 
-    cron { 'rsync_mobile_apps_stats':
-        command => "python ${command} ${config} >> ${log} 2>&1 && 
/usr/bin/rsync -rt ${rsync_from}/* 
stat1001.wikimedia.org::www/limn-public-data/",
+
+
+# == Define: misc::statistics::limn::data::generate
+#
+# Sets up daily cron jobs to run a script which
+# generates csv datafiles and rsyncs those files
+# to stat1001 so they can be served publicly.
+#
+# This requires that a repository with generate.py config
+# exists at https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git.
+#
+# == Usage
+#   misc::statistics::limn::data::generate { 'mobile': }
+#   misc::statistics::limn::data::generate { 'flow': }
+#   ...
+#
+define misc::statistics::limn::data::generate() {
+    require misc::statistics::limn::data
+
+    $user    = $misc::statistics::limn::data::user
+    $command = $misc::statistics::limn::data::command
+
+    # A repo at analytics/limn-${title}-data.git had better exist!
+    $git_remote        = 
"https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git";
+
+    # Directory at which to clone $git_remote
+    $source_dir        = 
"${misc::statistics::base::limn::data::working_path}/limn-${title}-data"
+
+    # config directory for this limn data generate job
+    $config_dir        = "${$source_dir}/${title}/"
+
+    # log file for the generate cron job
+    $log               = 
"${misc::statistics::limn::data::log_dir}/limn-${title}-data.log"
+
+    # Rsync from $public_dir/${title}
+    $rsync_from        = "${misc::statistics::limn::data::public_dir}/${title}"
+    $rsync_to          = $misc::statistics::limn::data::rsync_to
+
+    # I'm not totally sure what this is...
+    $output            = "${rsync_from}/mobile/datafiles"
+
+    if !defined(Git::Clone["analytics/limn-${title}-data"]) {
+        git::clone { "analytics/limn-${title}-data":
+            ensure    => 'latest',
+            directory => $source_dir,
+            origin    => $git_remote,
+            owner     => $user,
+            require   => [User[$user]],
+        }
+    }
+
+    file { [$source_dir, $rsync_from, $output]:
+        ensure => 'directory',
+        owner  => $misc::statistics::limn::data::user,
+        group  => wikidev,
+        mode   => '0775',
+    }
+
+    cron { "rsync_${title}_apps_stats":
+        command => "python ${command} ${config} >> ${log} 2>&1 && 
/usr/bin/rsync -rt ${rsync_from} ${rsync_to}/",
         user    => $user,
         minute  => 0,
     }
 }
 
+# == Class misc::statistics::limn::data::jobs
+# Uses the misc::statistics::limn::data::generate define
+# to set up cron jobs to generate and sync particular data.
+#
+class misc::statistics::limn::data::jobs {
+    misc::statistics::limn::data::generate { 'mobile': }
+}
+
 # == Class misc::statistics::geowiki::params
 # Parameters for geowiki that get used outside this file
 class misc::statistics::geowiki::params {
diff --git a/manifests/site.pp b/manifests/site.pp
index f3d67f3..8255a98 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -2590,7 +2590,7 @@
     include role::statistics::cruncher
 
     include misc::statistics::cron_blog_pageviews
-    include misc::statistics::limn::mobile_data_sync
+    include misc::statistics::limn::data::jobs
     include misc::statistics::researchdb_password
 
     class { 'admin':

-- 
To view, visit https://gerrit.wikimedia.org/r/171553
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iec0f862ca3e4ee63570f1c495260807071b1e2a4
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to