Ottomata has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/402072 )

Change subject: Use intermediate script for json refine jobs
......................................................................

Use intermediate script for json refine jobs

JsonRefine commands can be too long for crontab if table blacklist
or whitelist is very long.  This renders a script into /usr/local/bin
that will be used in the crontab.

Change-Id: I9dd99efa15a24185d69277c7fb1674e1a1b2594d
---
M modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
1 file changed, 15 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/72/402072/1

diff --git 
a/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp 
b/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
index d00b84c..db26c8d 100644
--- a/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/job/json_refine_job.pp
@@ -58,15 +58,28 @@
         default => "--send-email-report --to-emails ${email_to}"
     }
 
-    $command = "PYTHONPATH=${refinery_path}/python 
${refinery_path}/bin/is-yarn-app-running ${job_name} || /usr/bin/spark-submit 
--master yarn --deploy-mode cluster --driver-memory ${spark_driver_memory} 
--conf spark.dynamicAllocation.maxExecutors=${spark_max_executors} --files 
/etc/hive/conf/hive-site.xml --class 
org.wikimedia.analytics.refinery.job.JsonRefine --name ${job_name} 
${_refinery_job_jar} --parallelism ${parallelism} --since ${since} 
${whitelist_blacklist_opt} ${email_opts} --input-base-path ${input_base_path} 
--input-regex '${input_regex}' --input-capture '${input_capture}' 
--output-base-path ${output_base_path} --database ${output_database} >> 
${log_file} 2>&1"
+    # The command here can end up being pretty long, especially if the table 
whitelist
+    # or blacklist is long.  Crontabs have a line length limit, so we render 
this
+    # command into a script and then install that as the cron job.
+    $refine_command = "PYTHONPATH=${refinery_path}/python 
${refinery_path}/bin/is-yarn-app-running ${job_name} || /usr/bin/spark-submit 
--master yarn --deploy-mode cluster --driver-memory ${spark_driver_memory} 
--conf spark.dynamicAllocation.maxExecutors=${spark_max_executors} --files 
/etc/hive/conf/hive-site.xml --class 
org.wikimedia.analytics.refinery.job.JsonRefine --name ${job_name} 
${_refinery_job_jar} --parallelism ${parallelism} --since ${since} 
${whitelist_blacklist_opt} ${email_opts} --input-base-path ${input_base_path} 
--input-regex '${input_regex}' --input-capture '${input_capture}' 
--output-base-path ${output_base_path} --database ${output_database}"
+    $refine_script = "/usr/local/bin/${job_name}"
+    file { $refine_script:
+        ensure  => $ensure,
+        content => $refine_command,
+        owner   => 'root',
+        group   => 'root',
+        mode    => '0555',
+    }
 
     cron { $job_name:
-        command  => $command,
+        ensure   => $ensure,
+        command  => "${refine_script} >> ${log_file} 2>&1",
         user     => $user,
         hour     => $hour,
         minute   => $minute,
         month    => $month,
         monthday => $monthday,
         weekday  => $weekday,
+        require  => File[$refine_script],
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/402072
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9dd99efa15a24185d69277c7fb1674e1a1b2594d
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to