Volans has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/349416 )
Change subject: Puppet: run-puppet-agent, add --failed-only option
......................................................................
Puppet: run-puppet-agent, add --failed-only option
- Add the --failed-only option to the run-puppet-agent script to run
puppet only if it's enabled and failed in the last run.
This should allow to quickly and easily recover from deploys that
causes a puppet failure on a large number of hosts. After fixing the
underlying issue, it should be enough to run from one of the cumin
masters the command:
sudo cumin -b 25 -s 1 -p 95 '*' 'run-puppet-agent --failed-only'
to rolling force a puppet run only on the hosts where it failed.
Change-Id: Ide1246b256a4fd1b48ed660b3e87f77e91c1b5b4
---
M modules/base/files/puppet/puppet-common.sh
M modules/base/files/puppet/run-puppet-agent
2 files changed, 44 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/16/349416/1
diff --git a/modules/base/files/puppet/puppet-common.sh
b/modules/base/files/puppet/puppet-common.sh
index bf41fc0..4ec92be 100755
--- a/modules/base/files/puppet/puppet-common.sh
+++ b/modules/base/files/puppet/puppet-common.sh
@@ -1,7 +1,19 @@
#!/bin/bash
# Common bash functions and variables to use for scripts
-PUPPETLOCK=$(puppet agent --configprint agent_catalog_run_lockfile)
-PUPPET_DISABLEDLOCK=$(puppet agent --configprint agent_disabled_lockfile)
+PUPPET_CONFIG="$(puppet config print)"
+
+# Function to get a puppet config variable
+# Parameters:
+# $1: the name of the variable to get from the config
+get_puppet_config() {
+ # Using xargs to trim the string
+ echo "${PUPPET_CONFIG}" | grep "${1}" | cut -d "=" -f2- | xargs
+}
+
+PUPPETLOCK="$(get_puppet_config agent_catalog_run_lockfile)"
+PUPPET_DISABLEDLOCK="$(get_puppet_config agent_disabled_lockfile)"
+PUPPET_SUMMARY="$(get_puppet_config lastrunfile)"
+PUPPET_REPORT="$(get_puppet_config lastrunreport)"
# Function to test if puppet is running or not
puppet_is_running() {
@@ -37,3 +49,16 @@
# If puppet is still running at this point, report an error
return 1
}
+
+last_run_success() {
+ if grep "failure:" "${PUPPET_SUMMARY}" | awk '{ if($2 > 0) exit 1 }'; then
+ # No failures in the summary, check the status too
+ if grep "status:" "${PUPPET_REPORT}" | awk '{ if($2 == "failed") exit
1 }'; then
+ # No failed status
+ return 0
+ fi
+ fi
+
+ # Either puppet has failures or failed to run
+ return 1
+}
diff --git a/modules/base/files/puppet/run-puppet-agent
b/modules/base/files/puppet/run-puppet-agent
index 4a3a754..dfb243e 100755
--- a/modules/base/files/puppet/run-puppet-agent
+++ b/modules/base/files/puppet/run-puppet-agent
@@ -1,12 +1,13 @@
#!/bin/bash
verbose_opts="--verbose"
force=""
+failed_only=""
attempts=12
enable=""
show_help() {
cat << EOF
-Usage: ${0##*/} [-q] [-a ATTEMPTS] [-e [MSG]|-f]
+Usage: ${0##*/} [-q] [-a ATTEMPTS] [-e [MSG]|-f] [--failed-only]
Will execute a puppet agent run reliably, waiting for any preceding puppet runs
to complete before starting. This should allow to ensure a coordinated puppet
run after a change is effective everywhere.
@@ -15,7 +16,9 @@
-q --quiet Will make the puppet run quiet and not output any verbose
difference.
-e --enable MSG Will enable puppet if $MSG is present
- -f --force Will forcibly enable puppet if it's not enabled.
+ -f --force Will forcibly enable puppet if it is not enabled.
+ --failed-only Will run puppet only if the last run had failed. Will
+ silently skip if puppet is disabled and implies -q/--quiet.
-a --attempts N When waiting for a preceding puppet run, wait N*10 seconds
before giving up.
EOF
@@ -28,6 +31,10 @@
;;
-f|--force)
force=1
+ ;;
+ --failed-only)
+ failed_only=1
+ verbose_opts=""
;;
-a|--attempts)
if [ -n "$2" ]; then
@@ -69,10 +76,17 @@
test "$force" && puppet agent --enable
test "$enable" && enable-puppet "$enable"
+# Skip hosts where puppet is disabled if --failed-only is set
+test -n "${failed_only}" -a -f "${PUPPET_DISABLEDLOCK}" && exit 0
+
if ! wait_for_puppet "$attempts"; then
- SECONDS=$(( $attempts * 10 ))
+ SECONDS=$(( attempts * 10 ))
echo "Waited ${SECONDS} seconds and a preceding puppet run is still
ongoing, aborting"
exit 1
fi
+
+# Skip hosts where puppet didn't fail if --failed-only is set
+test -n "${failed_only}" -a last_run_success && exit 0
+
puppet agent --onetime --no-daemonize $verbose_opts --no-splay --show_diff \
--ignorecache --no-usecacheonfailure
--
To view, visit https://gerrit.wikimedia.org/r/349416
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ide1246b256a4fd1b48ed660b3e87f77e91c1b5b4
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Volans <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits