Ottomata has submitted this change and it was merged.

Change subject: Removing code that generates pageviews using legacy definition
......................................................................


Removing code that generates pageviews using legacy definition

We no longer use the legacy pageview definition thus
makes no sense for these files to be generated.

New pageview definition is in effect from May 2015:
https://meta.wikimedia.org/wiki/Research:Page_view
and legacy counts are available until the merge of this code.

Consumer of this code is vital signs

Bug: T124244
Change-Id: I7c6869da0fdc18d8edc6e67cbd688abac39676f5
---
M manifests/role/statistics.pp
D modules/statistics/manifests/aggregator/projectcounts.pp
M modules/statistics/manifests/aggregator/projectview.pp
3 files changed, 2 insertions(+), 83 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/manifests/role/statistics.pp b/manifests/role/statistics.pp
index 7a33714..72fe14a 100644
--- a/manifests/role/statistics.pp
+++ b/manifests/role/statistics.pp
@@ -111,11 +111,8 @@
     # kafkatee is useful here for adhoc processing of kafkadata
     require_package('kafkatee')
 
-    # aggregating hourly pagecount-all-sites project count files into
-    # daily per site csvs.
     # Although it is in the "private" role, the dataset actually isn't
     # private. We just keep it here to spare adding a separate role.
-    include statistics::aggregator::projectcounts
     include statistics::aggregator::projectview
 
     include passwords::mysql::research
diff --git a/modules/statistics/manifests/aggregator/projectcounts.pp 
b/modules/statistics/manifests/aggregator/projectcounts.pp
deleted file mode 100644
index 8812adc..0000000
--- a/modules/statistics/manifests/aggregator/projectcounts.pp
+++ /dev/null
@@ -1,78 +0,0 @@
-# == Class statistics::aggregator::projectcounts
-# Handles aggregation of pagecounts-all-sites projectcounts files
-#
-# WARNING - Files aggregated by this instance are legacy ones
-# A new pageview definition has been provided and aggregation
-# for it can be found in the same folder: projectview.pp
-#
-class statistics::aggregator::projectcounts {
-    require statistics::aggregator
-
-    # This class uses the cdh::hadoop::mount in order to get
-    # data files out of HDFS.
-    Class['cdh::hadoop::mount'] -> 
Class['::statistics::aggregator::projectcounts']
-
-    $script_path      = $::statistics::aggregator::script_path
-    $working_path     = 
"${::statistics::aggregator::working_path}/projectcounts"
-    $data_repo_path   = "${working_path}/data"
-    $data_path        = "${data_repo_path}/projectcounts"
-    $log_path         = "${working_path}/log"
-    # This should not be hardcoded.  Instead, one should be able to use
-    # $::cdh::hadoop::mount::mount_point to reference the user supplied
-    # parameter when the cdh::hadoop::mount class is evaluated.
-    # I am not sure why this is not working.
-    $hdfs_mount_point = '/mnt/hdfs'
-    $hdfs_source_path = 
"${hdfs_mount_point}/wmf/data/archive/pagecounts-all-sites"
-    $user             = $::statistics::user::username
-    $group            = $::statistics::user::username
-
-    file { $working_path:
-        ensure => 'directory',
-        owner  => $user,
-        group  => $group,
-        mode   => '0755'
-    }
-
-    git::clone { 'aggregator_projectcounts_data':
-        ensure    => 'latest',
-        directory => $data_repo_path,
-        # This repo should be /analytics/aggregator/projectcounts/data to
-        # be differenciated easily with /analytics/aggregator/projectview/data.
-        # But for legacy reasons we keep it as is.
-        origin    => 
'https://gerrit.wikimedia.org/r/p/analytics/aggregator/data.git',
-        owner     => $user,
-        group     => $group,
-        mode      => '0755',
-        require   => File[$working_path],
-    }
-
-    file { $log_path:
-        ensure  => 'directory',
-        owner   => $user,
-        group   => $group,
-        mode    => '0755',
-        require => File[$working_path],
-
-    }
-
-    # Cron for doing the basic aggregation step itself
-    cron { 'aggregator projectcounts aggregate':
-        command => "log_file=\"${log_path}/`date 
+\\%Y-\\%m-\\%d--\\%H-\\%M-\\%S`.log\" && 
${script_path}/bin/aggregate_projectcounts --source ${hdfs_source_path} 
--target ${data_path} --first-date=`date --date='-8 day' +\\%Y-\\%m-\\%d` 
--last-date=`date --date='-1 day' +\\%Y-\\%m-\\%d` --push-target --log 
\${log_file} 2>> \${log_file}",
-        user    => $user,
-        hour    => '13',
-        minute  => '0',
-        require => [
-            Git::Clone['aggregator_projectcounts_data'],
-            File[$log_path],
-        ],
-    }
-
-    # Cron for basing monitoring of the aggregated data
-    cron { 'aggregator projectcounts monitor':
-        command => "${script_path}/bin/check_validity_aggregated_projectcounts 
--data ${data_path}",
-        user    => $user,
-        hour    => '13',
-        minute  => '45',
-        require => Cron['aggregator projectcounts aggregate'],
-    }
-}
diff --git a/modules/statistics/manifests/aggregator/projectview.pp 
b/modules/statistics/manifests/aggregator/projectview.pp
index b490019..f05b3de 100644
--- a/modules/statistics/manifests/aggregator/projectview.pp
+++ b/modules/statistics/manifests/aggregator/projectview.pp
@@ -2,8 +2,8 @@
 # Handles aggregation of projectview_hourly files
 #
 # WARNING - Files aggregated by this instance are using the
-# new pageview definition. The legacy ones are managed by
-# projectcounts.pp in the same folder.
+# new pageview definition. The legacy ones are no longer 
+# being calculated
 #
 class statistics::aggregator::projectview {
     require statistics::aggregator

-- 
To view, visit https://gerrit.wikimedia.org/r/265656
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7c6869da0fdc18d8edc6e67cbd688abac39676f5
Gerrit-PatchSet: 5
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Nuria <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to