Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/337672 )
Change subject: Symlink reportupdater output to published-datasets
......................................................................
Symlink reportupdater output to published-datasets
This must be taken care of before the corresponding dashiki dashboards
can be updated to link to analytics.wikimedia.org/datasets instead of
datasets.wikimedia.org. This change ensures reportupdater will put its
output in the correct place considering the new rsync structure with
/srv/published-datasets being joined from stat1002 and stat1003 to
thorium. It removes the manual reportupdater-controlled rsync and
symlinks reportupdater output to published datasets.
Bug: T125854
Change-Id: If3d3412ea643e9536dcf67ab994b92624cc149d8
---
M modules/role/manifests/statistics/cruncher.pp
M modules/role/manifests/statistics/private.pp
M modules/statistics/manifests/compute.pp
3 files changed, 35 insertions(+), 7 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git a/modules/role/manifests/statistics/cruncher.pp
b/modules/role/manifests/statistics/cruncher.pp
index 1a070f0..0c39b01 100644
--- a/modules/role/manifests/statistics/cruncher.pp
+++ b/modules/role/manifests/statistics/cruncher.pp
@@ -29,11 +29,24 @@
# Set up reportupdater to be executed on this machine
- # and rsync the output base path to thorium.
class { 'reportupdater':
base_path => "${::statistics::working_path}/reportupdater",
user => $::statistics::user::username,
- rsync_to => 'thorium.eqiad.wmnet::srv/limn-public-data/',
+ }
+
+ # And set up a link for periodic jobs to be included in published reports.
+ # Because periodic is in published_datasets_path, files will be synced to
+ # analytics.wikimedia.org/datasets/periodic/reports
+ file { "${::statistics::compute::published_datasets_path}/periodic":
+ ensure => 'directory',
+ owner => 'root',
+ group => 'wikidev',
+ mode => '0775',
+ }
+ file {
"${::statistics::compute::published_datasets_path}/periodic/reports":
+ ensure => 'link',
+ target => "${::statistics::working_path}/reportupdater/output",
+ require => Class['reportupdater'],
}
# Set up various jobs to be executed by reportupdater
diff --git a/modules/role/manifests/statistics/private.pp
b/modules/role/manifests/statistics/private.pp
index f0c4014..f507472 100644
--- a/modules/role/manifests/statistics/private.pp
+++ b/modules/role/manifests/statistics/private.pp
@@ -55,13 +55,27 @@
# access to required files in Hadoop.
class { 'reportupdater':
base_path => "${::statistics::working_path}/reportupdater",
- rsync_to => 'thorium.eqiad.wmnet::srv/limn-public-data/metrics/',
user => 'hdfs',
# We know that this is included on stat1002, but unfortunetly
# it is done so outside of this role. Perhaps
# reportupdater should have its own role!
require => Class['cdh::hadoop'],
}
+ # And set up a link for periodic jobs to be included in published reports.
+ # Because periodic is in published_datasets_path, files will be synced to
+ # analytics.wikimedia.org/datasets/periodic/reports
+ file { "${::statistics::compute::published_datasets_path}/periodic":
+ ensure => 'directory',
+ owner => 'root',
+ group => 'wikidev',
+ mode => '0775',
+ }
+ file {
"${::statistics::compute::published_datasets_path}/periodic/reports":
+ ensure => 'link',
+ target => "${::statistics::working_path}/reportupdater/output",
+ require => Class['reportupdater'],
+ }
+
# Set up a job to create browser reports on hive db.
reportupdater::job { 'browser':
repository => 'reportupdater-queries',
diff --git a/modules/statistics/manifests/compute.pp
b/modules/statistics/manifests/compute.pp
index 85c7808..b7d0467 100644
--- a/modules/statistics/manifests/compute.pp
+++ b/modules/statistics/manifests/compute.pp
@@ -11,16 +11,17 @@
require_package('udp-filter')
$working_path = $::statistics::working_path
+ $published_datasets_path = "${working_path}/published-datasets"
# Create $working_path/published-datasets. Anything in this directory
# will be available at analytics.wikimedia.org/datasets.
# See: class statistics::sites::analytics.
- file { "${working_path}/published-datasets":
+ file { $published_datasets_path:
ensure => 'directory',
owner => 'root',
group => 'wikidev',
mode => '0775',
}
- file { "${working_path}/published-datasets/README":
+ file { "${published_datasets_path}/README":
ensure => 'present',
source => 'puppet:///modules/statistics/published-datasets-readme.txt',
owner => 'root',
@@ -34,8 +35,8 @@
# will sync them into /srv/analytics.wikimedia.org/datasets.
# See: statistics::sites::analytics.
cron { 'rsync-published-datasets':
- command => "/usr/bin/rsync -rt --delete
${working_path}/published-datasets/
thorium.eqiad.wmnet::srv/published-datasets-rsynced/${::hostname}/",
- require => File["${working_path}/published-datasets"],
+ command => "/usr/bin/rsync -rtL --delete ${published_datasets_path}/
thorium.eqiad.wmnet::srv/published-datasets-rsynced/${::hostname}/",
+ require => File[$published_datasets_path],
user => 'root',
minute => '*/30',
}
--
To view, visit https://gerrit.wikimedia.org/r/337672
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: If3d3412ea643e9536dcf67ab994b92624cc149d8
Gerrit-PatchSet: 6
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits