Elukey has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/395504 )
Change subject: role::analytics_cluster::coordinator: add a profile to restart streaming jobs ...................................................................... role::analytics_cluster::coordinator: add a profile to restart streaming jobs This patch adds a cron job checking and possibly relaunching banner spark streaming job in Yarn. Bug: T176983 Change-Id: Icdf583cce4dd6b6b69a145f57c8355bafc62aa08 --- A modules/profile/manifests/analytics/refinery/job/streams_check.pp M modules/role/manifests/analytics_cluster/coordinator.pp 2 files changed, 35 insertions(+), 0 deletions(-) Approvals: Elukey: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/profile/manifests/analytics/refinery/job/streams_check.pp b/modules/profile/manifests/analytics/refinery/job/streams_check.pp new file mode 100644 index 0000000..8afea43 --- /dev/null +++ b/modules/profile/manifests/analytics/refinery/job/streams_check.pp @@ -0,0 +1,34 @@ +# == Class profile::analytics::refinery::job::streams_check +# +# Deploy cron scripts able to check and restart (if needed) streaming jobs +# running on the Hadoop cluster that might have failed. This profile does not +# take care of alarming, that needs to be done separately. +# + +class profile::analytics::refinery::job::streams_check { + require ::profile::analytics::refinery + + # Shortcut var to DRY up cron commands. + $refinery_path = $role::analytics_cluster::refinery::path + + $refinery_job_jar = "${refinery_path}/artifacts/refinery-job.jar" + $spark_num_executors = 4 + $spark_executor_cores = 3 + $spark_driver_memory = '2G' + $spark_executor_memory = '4G' + $druid_segment_gran = 'HOUR' + $tranq_window_period = 'PT10M' + $batch_duration_secs = '60' + $job_name = 'BannerImpressionsStream' + + # No log needed as job runs in cluster mode + $command = "PYTHONPATH=${refinery_path}/python ${refinery_path}/bin/is-yarn-app-running ${job_name} || /usr/bin/spark2-submit --master yarn --deploy-mode cluster --queue production --conf spark.dynamicAllocation.enabled=false --driver-memory ${spark_driver_memory} --executor-memory ${spark_executor_memory} --executor-cores ${spark_executor_cores} --num-executors ${spark_num_executors} --class org.wikimedia.analytics.refinery.job.druid.BannerImpressionsStream --name ${job_name} ${refinery_job_jar} --druid-indexing-segment-granularity ${druid_segment_gran} --druid-indexing-window-period ${tranq_window_period} --batch-duration-seconds ${batch_duration_secs} > /dev/null 2>&1" + + # This checks for banner streaming job running in Yarn, and relaunches it if needed. + cron { 'refinery-relaunch-banner-streaming': + command => $command, + environment => 'MAILTO=analytics-ale...@wikimedia.org', + user => 'hdfs', + minute => '*/5' + } +} diff --git a/modules/role/manifests/analytics_cluster/coordinator.pp b/modules/role/manifests/analytics_cluster/coordinator.pp index 2222a96..9f38259 100644 --- a/modules/role/manifests/analytics_cluster/coordinator.pp +++ b/modules/role/manifests/analytics_cluster/coordinator.pp @@ -58,6 +58,7 @@ include ::profile::analytics::refinery::job::project_namespace_map include ::profile::analytics::refinery::job::sqoop_mediawiki include ::profile::analytics::refinery::job::json_refine + include ::profile::analytics::refinery::job::streams_check include standard include ::profile::base::firewall -- To view, visit https://gerrit.wikimedia.org/r/395504 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Icdf583cce4dd6b6b69a145f57c8355bafc62aa08 Gerrit-PatchSet: 6 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Joal <j...@wikimedia.org> Gerrit-Reviewer: Elukey <ltosc...@wikimedia.org> Gerrit-Reviewer: Joal <j...@wikimedia.org> Gerrit-Reviewer: Ottomata <ao...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits