Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/402072 )
Change subject: Use intermediate script for json refine jobs
......................................................................
Use intermediate script for json refine jobs
JsonRefine commands can be too long for crontab if table blacklist
or whitelist is very long. This renders a script into /usr/local/bin
that will be used in the crontab.
Change-Id: I9dd99efa15a24185d69277c7fb1674e1a1b2594d
---
M modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
M modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
2 files changed, 15 insertions(+), 4 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git
a/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
b/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
index f50189b..730daaf 100644
--- a/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/json_refine.pp
@@ -8,8 +8,6 @@
# Refine EventLogging Analytics (capsule based) data.
role::analytics_cluster::refinery::job::json_refine_job {
'eventlogging_analytics':
- # Temporarily disabled for T179625.
- ensure => 'absent',
input_base_path => '/wmf/data/raw/eventlogging',
input_regex =>
'eventlogging_(.+)/hourly/(\\d+)/(\\d+)/(\\d+)/(\\d+)',
input_capture => 'table,year,month,day,hour',
diff --git
a/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
b/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
index d00b84c..db26c8d 100644
--- a/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
@@ -58,15 +58,28 @@
default => "--send-email-report --to-emails ${email_to}"
}
- $command = "PYTHONPATH=${refinery_path}/python
${refinery_path}/bin/is-yarn-app-running ${job_name} || /usr/bin/spark-submit
--master yarn --deploy-mode cluster --driver-memory ${spark_driver_memory}
--conf spark.dynamicAllocation.maxExecutors=${spark_max_executors} --files
/etc/hive/conf/hive-site.xml --class
org.wikimedia.analytics.refinery.job.JsonRefine --name ${job_name}
${_refinery_job_jar} --parallelism ${parallelism} --since ${since}
${whitelist_blacklist_opt} ${email_opts} --input-base-path ${input_base_path}
--input-regex '${input_regex}' --input-capture '${input_capture}'
--output-base-path ${output_base_path} --database ${output_database} >>
${log_file} 2>&1"
+ # The command here can end up being pretty long, especially if the table
whitelist
+ # or blacklist is long. Crontabs have a line length limit, so we render
this
+ # command into a script and then install that as the cron job.
+ $refine_command = "PYTHONPATH=${refinery_path}/python
${refinery_path}/bin/is-yarn-app-running ${job_name} || /usr/bin/spark-submit
--master yarn --deploy-mode cluster --driver-memory ${spark_driver_memory}
--conf spark.dynamicAllocation.maxExecutors=${spark_max_executors} --files
/etc/hive/conf/hive-site.xml --class
org.wikimedia.analytics.refinery.job.JsonRefine --name ${job_name}
${_refinery_job_jar} --parallelism ${parallelism} --since ${since}
${whitelist_blacklist_opt} ${email_opts} --input-base-path ${input_base_path}
--input-regex '${input_regex}' --input-capture '${input_capture}'
--output-base-path ${output_base_path} --database ${output_database}"
+ $refine_script = "/usr/local/bin/${job_name}"
+ file { $refine_script:
+ ensure => $ensure,
+ content => $refine_command,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ }
cron { $job_name:
- command => $command,
+ ensure => $ensure,
+ command => "${refine_script} >> ${log_file} 2>&1",
user => $user,
hour => $hour,
minute => $minute,
month => $month,
monthday => $monthday,
weekday => $weekday,
+ require => File[$refine_script],
}
}
--
To view, visit https://gerrit.wikimedia.org/r/402072
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9dd99efa15a24185d69277c7fb1674e1a1b2594d
Gerrit-PatchSet: 3
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits