Volans has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/349416 )

Change subject: Puppet: run-puppet-agent, add --failed-only option
......................................................................

Puppet: run-puppet-agent, add --failed-only option

- Add the --failed-only option to the run-puppet-agent script to run
  puppet only if it's enabled and failed in the last run.
  This should allow to quickly and easily recover from deploys that
  causes a puppet failure on a large number of hosts. After fixing the
  underlying issue, it should be enough to run from one of the cumin
  masters the command:

    sudo cumin -b 25 -s 1 -p 95 '*' 'run-puppet-agent --failed-only'

  to rolling force a puppet run only on the hosts where it failed.

Change-Id: Ide1246b256a4fd1b48ed660b3e87f77e91c1b5b4
---
M modules/base/files/puppet/puppet-common.sh
M modules/base/files/puppet/run-puppet-agent
2 files changed, 44 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/16/349416/1

diff --git a/modules/base/files/puppet/puppet-common.sh 
b/modules/base/files/puppet/puppet-common.sh
index bf41fc0..4ec92be 100755
--- a/modules/base/files/puppet/puppet-common.sh
+++ b/modules/base/files/puppet/puppet-common.sh
@@ -1,7 +1,19 @@
 #!/bin/bash
 # Common bash functions and variables to use for scripts
-PUPPETLOCK=$(puppet agent --configprint agent_catalog_run_lockfile)
-PUPPET_DISABLEDLOCK=$(puppet agent --configprint agent_disabled_lockfile)
+PUPPET_CONFIG="$(puppet config print)"
+
+# Function to get a puppet config variable
+# Parameters:
+#  $1: the name of the variable to get from the config
+get_puppet_config() {
+    # Using xargs to trim the string
+    echo "${PUPPET_CONFIG}" | grep "${1}" | cut -d "=" -f2- | xargs
+}
+
+PUPPETLOCK="$(get_puppet_config agent_catalog_run_lockfile)"
+PUPPET_DISABLEDLOCK="$(get_puppet_config agent_disabled_lockfile)"
+PUPPET_SUMMARY="$(get_puppet_config lastrunfile)"
+PUPPET_REPORT="$(get_puppet_config lastrunreport)"
 
 # Function to test if puppet is running or not
 puppet_is_running() {
@@ -37,3 +49,16 @@
     # If puppet is still running at this point, report an error
     return 1
 }
+
+last_run_success() {
+    if grep "failure:" "${PUPPET_SUMMARY}" | awk '{ if($2 > 0) exit 1 }'; then
+        # No failures in the summary, check the status too
+        if grep "status:" "${PUPPET_REPORT}" | awk '{ if($2 == "failed") exit 
1 }'; then
+            # No failed status
+            return 0
+        fi
+    fi
+
+    # Either puppet has failures or failed to run
+    return 1
+}
diff --git a/modules/base/files/puppet/run-puppet-agent 
b/modules/base/files/puppet/run-puppet-agent
index 4a3a754..dfb243e 100755
--- a/modules/base/files/puppet/run-puppet-agent
+++ b/modules/base/files/puppet/run-puppet-agent
@@ -1,12 +1,13 @@
 #!/bin/bash
 verbose_opts="--verbose"
 force=""
+failed_only=""
 attempts=12
 enable=""
 
 show_help() {
     cat << EOF
-Usage: ${0##*/} [-q] [-a ATTEMPTS] [-e [MSG]|-f]
+Usage: ${0##*/} [-q] [-a ATTEMPTS] [-e [MSG]|-f] [--failed-only]
 Will execute a puppet agent run reliably, waiting for any preceding puppet runs
 to complete before starting. This should allow to ensure a coordinated puppet
 run after a change is effective everywhere.
@@ -15,7 +16,9 @@
     -q --quiet      Will make the puppet run quiet and not output any verbose
                     difference.
     -e --enable MSG Will enable puppet if $MSG is present
-    -f --force      Will forcibly enable puppet if it's not enabled.
+    -f --force      Will forcibly enable puppet if it is not enabled.
+    --failed-only   Will run puppet only if the last run had failed. Will
+                    silently skip if puppet is disabled and implies -q/--quiet.
     -a --attempts N When waiting for a preceding puppet run, wait N*10 seconds
                     before giving up.
 EOF
@@ -28,6 +31,10 @@
             ;;
         -f|--force)
             force=1
+            ;;
+        --failed-only)
+            failed_only=1
+            verbose_opts=""
             ;;
         -a|--attempts)
             if [ -n "$2" ]; then
@@ -69,10 +76,17 @@
 test "$force" && puppet agent --enable
 test "$enable" && enable-puppet "$enable"
 
+# Skip hosts where puppet is disabled if --failed-only is set
+test -n "${failed_only}" -a -f "${PUPPET_DISABLEDLOCK}" && exit 0
+
 if ! wait_for_puppet "$attempts"; then
-    SECONDS=$(( $attempts * 10 ))
+    SECONDS=$(( attempts * 10 ))
     echo "Waited ${SECONDS} seconds and a preceding puppet run is still 
ongoing, aborting"
     exit 1
 fi
+
+# Skip hosts where puppet didn't fail if --failed-only is set
+test -n "${failed_only}" -a last_run_success && exit 0
+
 puppet agent --onetime --no-daemonize $verbose_opts --no-splay --show_diff \
        --ignorecache --no-usecacheonfailure

-- 
To view, visit https://gerrit.wikimedia.org/r/349416
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ide1246b256a4fd1b48ed660b3e87f77e91c1b5b4
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Volans <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to