Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/187432
Change subject: Decom webstatscollector step 1
......................................................................
Decom webstatscollector step 1
Step two will remove the remaining puppetization once the services are stopped
and packages removed.
Bug: T87868
Change-Id: I25ff1880cb9ff4d5f643bf1c0588ec1d6125cdda
---
M manifests/role/analytics/kafkatee.pp
M manifests/role/logging.pp
M manifests/role/statistics.pp
M templates/udp2log/filters.oxygen.erb
4 files changed, 31 insertions(+), 161 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/32/187432/1
diff --git a/manifests/role/analytics/kafkatee.pp
b/manifests/role/analytics/kafkatee.pp
index 005d5eb..0db257e 100644
--- a/manifests/role/analytics/kafkatee.pp
+++ b/manifests/role/analytics/kafkatee.pp
@@ -161,130 +161,6 @@
-
-# == role::analytics::kafkatee::webstatscollector
-# We want to run webstatscollector via kafkatee for testing.
-# Some of the production (role::logging::webstatscollector)
-# configs are not relevant here, so we copy the class
-# and edit it.
-#
-# webstatscollector needs the mobile and text webrequest logs,
-# so this class makes sure that these topics are consumed by kafkaee
-# by including their kafkatee::input::* roles.
-#
-class role::analytics::kafkatee::webrequest::webstatscollector {
- include role::analytics::kafkatee::input::webrequest::mobile
- include role::analytics::kafkatee::input::webrequest::text
-
- # webstatscollector package creates this directory.
- # webstats-collector process writes dump files here.
- $webstats_dumps_directory = '/srv/log/webstats/dumps'
- # collector creates temporary Berkeley DB files that have
- # very high write IO. Upstart will chdir into
- # this temp directory before starting collector.
- $webstats_temp_directory = '/run/webstats'
-
- $collector_host = $::fqdn
- $collector_port = 3815
-
- file { $webstats_temp_directory:
- ensure => 'directory',
- owner => 'nobody',
- group => 'nogroup',
- }
- # Mount the temp directory as a tmpfs in /run
- mount { $webstats_temp_directory:
- ensure => 'mounted',
- device => 'tmpfs',
- fstype => 'tmpfs',
- options =>
'uid=nobody,gid=nogroup,mode=0755,noatime,defaults,size=2000m',
- pass => 0,
- dump => 0,
- require => File[$webstats_temp_directory],
- }
-
- # Create the dumps/ directory in which
- # we want collector to output hourly dump files.
- file { $webstats_dumps_directory:
- ensure => 'directory',
- owner => 'nobody',
- group => 'nogroup',
- require => Mount[$webstats_temp_directory],
- }
- # collector writes dumps to $cwd/dumps. We are going
- # run collector in $webstats_temp_directory, but we want dumps to be
- # on the normal filesystem. Symlink $webstats_temp_directory/dumps
- # to the dumps directory.
- file { "${webstats_temp_directory}/dumps":
- ensure => 'link',
- target => $webstats_dumps_directory,
- require => File[$webstats_dumps_directory],
- }
-
- package { 'webstatscollector':
- ensure => 'installed',
- }
- # Install a custom webstats-collector init script to use
- # custom temp directory.
- file { '/etc/init/webstats-collector.conf':
- content => template('webstatscollector/webstats-collector.init.erb'),
- owner => 'root',
- group => 'root',
- mode => '0444',
- require => Package['webstatscollector'],
- }
-
- service { 'webstats-collector':
- # 2014-10-30 turning of webstatscollector here while we
- # troubleshoot some kafkatee missing lines. - otto
- ensure => 'stopped',
- hasstatus => 'false',
- hasrestart => 'true',
- require => Package['webstatscollector'],
- }
-
- ferm::service { 'webstats-collector':
- proto => 'tcp',
- port => $collector_port,
- srange => '$ALL_NETWORKS',
- }
-
- # Gzip pagecounts files hourly.
- cron { 'webstats-dumps-gzip':
- # 2014-10-30 turning of webstatscollector here while we
- # troubleshoot some kafkatee missing lines. - otto
- ensure => 'absent',
- command => "/bin/gzip
${webstats_dumps_directory}/pagecounts-????????-?????? 2> /dev/null",
- minute => 2,
- user => 'nobody',
- require => Service['webstats-collector'],
- }
-
- # Delete webstats dumps that are older than 10 days daily.
- cron { 'webstats-dumps-delete':
- # 2014-10-30 turning of webstatscollector here while we
- # troubleshoot some kafkatee missing lines. - otto
- ensure => 'absent',
- command => "/usr/bin/find ${webstats_dumps_directory} -maxdepth 1
-type f -mtime +10 -delete",
- minute => 28,
- hour => 1,
- user => 'nobody',
- require => Service['webstats-collector'],
- }
-
- # kafkatee outputs into webstats filter and forwards to webstats collector
via log2udp
- ::kafkatee::output { 'webstatscollector':
- # 2014-10-30 turning of webstatscollector here while we
- # troubleshoot some kafkatee missing lines. - otto
- ensure => 'absent',
- destination => "/usr/local/bin/filter | /usr/bin/log2udp -h
${collector_host} -p ${collector_port}",
- type => 'pipe',
- require => Service['webstats-collector'],
- }
-}
-
-
-
# == Class role::analytics::kafkatee::input::webrequest
# Includes each of the 4 webrequest topics as input
# You can use this class, or if you want to consume
diff --git a/manifests/role/logging.pp b/manifests/role/logging.pp
index 366b232..6567154 100644
--- a/manifests/role/logging.pp
+++ b/manifests/role/logging.pp
@@ -230,20 +230,23 @@
# this temp directory before starting collector.
$webstats_temp_directory = '/run/webstats'
- file { $webstats_temp_directory:
- ensure => 'directory',
- owner => 'nobody',
- group => 'nogroup',
- }
+
# Mount the temp directory as a tmpfs in /run
mount { $webstats_temp_directory:
- ensure => 'mounted',
+ ensure => 'unmounted',
device => 'tmpfs',
fstype => 'tmpfs',
options =>
'uid=nobody,gid=nogroup,mode=0755,noatime,defaults,size=2000m',
pass => 0,
dump => 0,
- require => File[$webstats_temp_directory],
+ before => File[$webstats_temp_directory],
+ }
+
+
+ file { $webstats_temp_directory:
+ ensure => 'absent',
+ owner => 'nobody',
+ group => 'nogroup',
}
# Create the dumps/ directory in which
@@ -252,21 +255,13 @@
ensure => 'directory',
owner => 'nobody',
group => 'nogroup',
- require => Mount[$webstats_temp_directory],
+ # require => Mount[$webstats_temp_directory],
}
- # collector writes dumps to $cwd/dumps. We are going
- # run collector in $webstats_temp_directory, but we want dumps to be
- # on the normal filesystem. Symlink $webstats_temp_directory/dumps
- # to the dumps directory.
- file { "${webstats_temp_directory}/dumps":
- ensure => 'link',
- target => $webstats_dumps_directory,
- require => File[$webstats_dumps_directory],
- }
+
package { 'webstatscollector':
- ensure => 'latest',
+ ensure => 'absent',
}
# Install a custom webstats-collector init script to use
@@ -276,29 +271,32 @@
owner => 'root',
group => 'root',
mode => '0444',
- require => Package['webstatscollector'],
+ # require => Package['webstatscollector'],
+ ensure => 'absent',
}
- service { 'webstats-collector':
- ensure => 'running',
- hasstatus => false,
- hasrestart => true,
- require => [
- File['/etc/init/webstats-collector.conf'],
- File["${webstats_temp_directory}/dumps"],
- ],
- }
+ # service { 'webstats-collector':
+ # ensure => 'running',
+ # hasstatus => false,
+ # hasrestart => true,
+ # require => [
+ # File['/etc/init/webstats-collector.conf'],
+ # File["${webstats_temp_directory}/dumps"],
+ # ],
+ # }
ferm::service { 'webstats-collector':
proto => 'tcp',
port => '3815',
srange => '$ALL_NETWORKS',
+ ensure => 'absent',
}
# dataset1001 needs to be able to use ssh
# to rsync webstatscollector pagecount files from gadolinium.
ferm::rule { 'dataset-ssh':
rule => 'proto tcp dport ssh saddr 208.80.154.11/32 ACCEPT;',
+ ensure => 'absent',
}
# install a nrpe check for the webstatscollector collector process
@@ -307,7 +305,8 @@
nrpe_command => '/usr/lib/nagios/plugins/check_procs --argument-array
/usr/local/bin/collector -c 1:2',
contact_group => 'analytics',
retries => 10,
- require => Service['webstats-collector']
+ require => Service['webstats-collector'],
+ ensure => 'absent',
}
# Gzip pagecounts files hourly.
@@ -316,6 +315,7 @@
minute => 2,
user => 'nobody',
require => Service['webstats-collector'],
+ ensure => 'absent',
}
# Delete webstats dumps that are older than 10 days daily.
@@ -325,6 +325,7 @@
hour => 1,
user => 'nobody',
require => Service['webstats-collector'],
+ ensure => 'absent',
}
}
diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index be005ce..589305f 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -92,7 +92,7 @@
# kafkatee is useful here for adhoc processing of kafkadata
require_package('kafkatee')
- # aggregating hourly webstatscollector project count files into
+ # aggregating hourly pagecount-all-sites project count files into
# daily per site csvs.
# Although it is in the “private” role, the dataset actually isn't
# private. We just keep it here to spare adding a separate role.
diff --git a/templates/udp2log/filters.oxygen.erb
b/templates/udp2log/filters.oxygen.erb
index 12523b5..a0be6f1 100644
--- a/templates/udp2log/filters.oxygen.erb
+++ b/templates/udp2log/filters.oxygen.erb
@@ -26,10 +26,3 @@
### All 5xx error responses -- domas (now using udp-filter instead of
5xx-filter).
# pipe 1 <%= @template_variables['webrequest_filter_directory'] %>/5xx-filter
| awk -W interactive '$9 !~ "upload.wikimedia.org"' >> <%= log_directory
%>/5xx.tsv.log
pipe 1 /usr/bin/udp-filter -F '\t' -r -s '^5' | awk -W interactive '$9 !~
"upload.wikimedia.org"' >> <%= log_directory %>/5xx.tsv.log
-
-### webstatscollector -- domas
-# NOTE! The files that filter and collector (on protactiniuym) generates
-# are rsynced over to dataset1001 by a cron maintained by
-# apergos running on snapshot1.
-# protactinium.wikimedia.org:3815 should be the webstats collector process.
-pipe 1 /usr/local/bin/filter | /usr/bin/log2udp -h protactinium.wikimedia.org
-p 3815
--
To view, visit https://gerrit.wikimedia.org/r/187432
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I25ff1880cb9ff4d5f643bf1c0588ec1d6125cdda
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits