Ottomata has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/402064 )

Change subject: Refine mediawiki job queue events into Hive event database
......................................................................

Refine mediawiki job queue events into Hive event database

Change-Id: I279aa9046d4a632183894d9d21893307962d4621
---
M modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
1 file changed, 42 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/64/402064/1

diff --git 
a/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
index d1c7a3f..f50189b 100644
--- a/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
@@ -30,4 +30,46 @@
         table_blacklist  => 
'^mediawiki_page_properties_change|mediawiki_recentchange$',
         minute           => 20,
     }
+
+    # Refine Mediawiki job queue events (from EventBus).
+    # This could be combined into the same EventBus refine job above, but it 
is nice to
+    # have them separated, as the job queue schemas are legacy and can be 
problematic.
+
+    # $problematic_jobs will not be refined.
+    # These have inconsistent schemas that cause refinement to fail.
+    $problematic_jobs = [
+        'EchoNotificationJob',
+        'EchoNotificationDeleteJob',
+        'TranslationsUpdateJob',
+        'MessageGroupStatesUpdaterJob',
+        'InjectRCRecords',
+        'cirrusSearchDeleteArchive',
+        'enqueue',
+        'htmlCacheUpdate',
+        'LocalRenameUserJob',
+        'RecordLintJob',
+        'wikibase_addUsagesForPage',
+        'refreshLinks',
+        'cirrusSearchCheckerJob',
+        'MassMessageSubmitJob',
+        'refreshLinksPrioritized',
+        'TranslatablePageMoveJob',
+        'ORESFetchScoreJob',
+        'PublishStashedFile',
+        'CentralAuthCreateLocalAccountJob',
+        'gwtoolsetUploadMediafileJob',
+    ]
+    $table_blacklist = sprintf('.*(%s)$', join($problematic_jobs, '|'))
+
+    role::analytics_cluster::refinery::job::json_refine_job { 
'eventlogging_eventbus_job_queue':
+        # This is imported by camus_job { 'mediawiki_job': }
+        input_base_path  => '/wmf/data/raw/mediawiki_job',
+        # 'datacenter' is extracted from the input path into a Hive table 
partition
+        input_regex      => 
'.*(eqiad|codfw)_(.+)/hourly/(\\d+)/(\\d+)/(\\d+)/(\\d+)',
+        input_capture    => 'datacenter,table,year,month,day,hour',
+        output_base_path => '/wmf/data/event',
+        output_database  => 'event',
+        table_blacklist  => $table_blacklist,
+        minute           => 25,
+    }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/402064
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I279aa9046d4a632183894d9d21893307962d4621
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to