Dzahn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/238639

Change subject: moved 'upgrade-helper' script over from puppet repo
......................................................................

moved 'upgrade-helper' script over from puppet repo

Change-Id: I98cd66c622a25d1d64753e26aafd90ccb5089af3
---
A upgradehelper/upgrade-helper
1 file changed, 326 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/software 
refs/changes/39/238639/1

diff --git a/upgradehelper/upgrade-helper b/upgradehelper/upgrade-helper
new file mode 100755
index 0000000..58f04aa
--- /dev/null
+++ b/upgradehelper/upgrade-helper
@@ -0,0 +1,326 @@
+#!/bin/bash
+# wmf upgrade helper aka. "UFO" (Upgrades For Operations)
+# a script to help with server upgrades
+# initial version dzahn 20120505
+# updated lcarr 20130306
+
+# current features:
+## update node group from icinga config
+## check kernel versions on node groups
+## check package upgrade status on node groups
+## check uptimes on node groups
+
+# this works on fenari (root) as "upgrade-helper"
+# it uses dsh, bc, figlet and cowsay
+# (these were all installed there already)
+
+# config
+
+## the current known-good kernel version
+goodkernel="2.6.32-41-server"
+
+## number of pending package upgrades considered ok (0 for strict checking)
+pkg_limit=0
+
+## number of days of uptime considered to be critical
+uptime_limit=200
+
+## set path to dsh and group files
+DSH=$(which dsh)
+DSH_GROUP_DIR=/etc/dsh/group
+
+# / config
+
+# yay, colors
+DULL=0
+BRIGHT=1
+FG_WHITE=37
+FG_RED=31
+FG_GREEN=32
+FG_VIOLET=35
+BG_NULL=00
+ESC="\033"
+RESET="$ESC[${DULL};${FG_WHITE};${BG_NULL}m"
+BRIGHT_WHITE="$ESC[${BRIGHT};${FG_WHITE}m"
+BRIGHT_RED="$ESC[${BRIGHT};${FG_RED}m"
+BRIGHT_GREEN="$ESC[${BRIGHT};${FG_GREEN}m"
+BRIGHT_VIOLET="$ESC[${BRIGHT};${FG_VIOLET}m"
+
+# yay, figlet
+FIGLET=$(which figlet)" -f mini"
+
+# functions
+
+function menu() {
+
+       echo -e "\nHi $(whoami), welcome to ..${BRIGHT_WHITE}"
+       echo "wmf upgrade helper" | $FIGLET
+       echo -e $RESET
+       echo -e "${BRIGHT_WHITE}u${RESET} - (u)pdate a dsh group list from 
icinga data\n"
+       echo -e "${BRIGHT_WHITE}k${RESET} - check (k)ernel versions on a dsh 
group\n"
+       echo -e "${BRIGHT_WHITE}p${RESET} - check (p)ackage upgrades on a dsh 
group\n"
+       echo -e "${BRIGHT_WHITE}t${RESET} - check up(t)imes on a dsh group\n"
+       echo -e "${BRIGHT_WHITE}q${RESET} - (q)uit\n"
+
+       echo -e "${BRIGHT_WHITE}what do you want to do? ${RESET}\n"
+       read -p ">" menuselect
+
+
+       case $menuselect in
+               "u")
+                       groupselect
+                       updategroup $nodegroup ;;
+               "k")
+                       groupselect
+                       echo -e "which kernel version is good? (string as 
returned by uname -r) (default (enter): ${goodkernel}) \n"
+                       read -p ">" kernelversion
+                       if [ -z $kernelversion ]; then 
kernelversion=$goodkernel; fi
+                       kernelcheck $nodegroup $kernelversion;;
+               "p")
+                       groupselect
+                       echo -e "how many installable packages do you want to 
tolerate? (default (enter): ${pkg_limit}) \n"
+                       read -p ">" threshold
+                       if [ -z $threshold ]; then threshold=$pkg_limit; fi
+                       pkgcheck $nodegroup $threshold;;
+               "t")
+                       groupselect
+                       echo -e "is there a certain number of days of uptime 
you consider critical? (default (enter): ${uptime_limit})\n"
+                       read -p ">" maxuptime
+                       if [ -z $maxuptime ]; then maxuptime=$uptime_limit; fi
+                       uptimecheck $nodegroup $maxuptime;;
+               "q")
+                       echo "bye"
+                       exit 0;;
+               "*")
+                       echo "invalid option. use one of: u k p t q"
+                       exit 1;;
+       esac
+
+}
+
+function groupselect () {
+       echo -e "which group do you want to check? (name of a dsh group file) 
\n"
+       read -p ">" nodegroup
+}
+
+function colorize() {
+
+       if [ $uphosts == "100.00" ]; then
+               UCOLOR=$BRIGHT_GREEN
+       else
+               UCOLOR=$BRIGHT_RED
+       fi
+
+       if [ $progress == "100.00" ]; then
+               PCOLOR=$BRIGHT_GREEN
+       else
+               PCOLOR=$BRIGHT_RED
+       fi
+}
+
+function kernelcheck() {
+
+       nodegroup=$1
+       goodkernel=$2
+       countup=0
+       countgood=0
+       countbad=0
+
+       echo "" > /tmp/kernel_check_${nodegroup}
+       echo "" > /tmp/kernel_check_result_${nodegroup}
+       echo -e "${BRIGHT_WHITE}checking group '${nodegroup}' for kernel 
'${goodkernel}' .. gathering info .. ${RESET}\n"
+
+       $DSH -M -g $nodegroup "uname -r" | tee /tmp/kernel_check_${nodegroup}
+
+       echo -e "\n${BRIGHT_WHITE}sorting results ...${RESET}\n"
+
+       while read curline; do
+
+               host_name=$(echo $curline | cut -d ":" -f1 )
+               host_kernel=$(echo $curline | cut -d " " -f2)
+
+               if [ "$host_kernel" == "$goodkernel" ]; then
+                       HCOLOR=$BRIGHT_GREEN
+                       let countgood=countgood+1
+               else
+                       HCOLOR=$BRIGHT_RED
+                       let countbad=countbad+1
+               fi
+
+               echo -e "${host_kernel} kernel on ${HCOLOR}$host_name${RESET}" 
>> /tmp/kernel_check_result_${nodegroup}
+
+       done < /tmp/kernel_check_${nodegroup}
+
+       sort -nr /tmp/kernel_check_result_${nodegroup}
+       countall=$(wc -l /etc/dsh/group/${nodegroup}| cut -d " " -f1)
+       countup=$(wc -l /tmp/kernel_check_${nodegroup} | cut -d " " -f1)
+       uphosts=$(echo "scale=2; ${countup}*100/${countall}" | bc)
+       progress=$(echo "scale=2; ${countgood}*100/${countup}" | bc)
+       lefttogo=$(echo "scale=2; 100-${progress}" | bc)
+
+       colorize
+
+       echo -e "\n${BRIGHT_WHITE}results for 
'${nodegroup}':${RESET}\n\nservers in group: ${countall} - servers reached: 
${UCOLOR}${countup} (${uphosts}%)${RESET}\nservers up with good kernels: 
${PCOLOR}${countgood} (${progress}%)${RESET} - servers up with bad kernels: 
${PCOLOR}${countbad} (${lefttogo}%)${RESET}\n\n"
+
+       if [ $uphosts == "100.00" ] && [ $progress == "100.00" ]; then
+               echo -e "\n${BRIGHT_GREEN}Yay!${RESET} Looks all 
${BRIGHT_GREEN}good${RESET}. Here's your kitten ..:)\n"
+               kitten "purr .. want to mail this to RT now? .."
+       else
+               echo -e "\n${BRIGHT_RED}:( keep going.${RESET} There are 
upgrades left to do, some hosts are down or the node list is outdated.\n"
+       fi
+
+}
+
+function pkgcheck () {
+
+       nodegroup=$1
+       threshold=$2
+       countgood=0
+       countbad=0
+       countup=0
+
+       echo "" > /tmp/pkg_check_${nodegroup}
+       echo "" > /tmp/pkg_check_result_${nodegroup}
+       echo -e "${BRIGHT_WHITE}checking group '${nodegroup}' for number of 
installabe package upgrades .. (threshold $threshold) gathering info .. 
${RESET}\n\n"
+
+       # simulated! -s dist-upgrade, count number of Inst lines
+       $DSH -M -g $nodegroup "apt-get -s dist-upgrade | grep ^Inst | wc -l" | 
tee /tmp/pkg_check_${nodegroup}
+
+       echo -e "\n${BRIGHT_WHITE}sorting results ...${RESET}\n"
+
+       while read curline; do
+
+               host_name=$(echo $curline | cut -d ":" -f1 )
+               num_upgrades=$(echo $curline | cut -d " " -f2)
+
+               if [ $num_upgrades -le $threshold ]; then
+                       HCOLOR=$BRIGHT_GREEN
+                       let countgood=countgood+1
+               else
+                       HCOLOR=$BRIGHT_RED
+                       let countbad=countbad+1
+               fi
+
+               echo -e "${num_upgrades} upgrades installable on 
${HCOLOR}$host_name${RESET}" >> /tmp/pkg_check_result_${nodegroup}
+
+       done < /tmp/pkg_check_${nodegroup}
+
+       sort -nr /tmp/pkg_check_result_${nodegroup}
+
+       countall=$(wc -l /etc/dsh/group/${nodegroup}| cut -d " " -f1)
+       countup=$(wc -l /tmp/pkg_check_${nodegroup} | cut -d " " -f1)
+       uphosts=$(echo "scale=2; ${countup}*100/${countall}" | bc)
+       progress=$(echo "scale=2; ${countgood}*100/${countup}" | bc)
+       lefttogo=$(echo "scale=2; 100-${progress}" | bc)
+
+       colorize
+
+       echo -e "\n${BRIGHT_WHITE}results for 
'${nodegroup}':${RESET}\n\nservers in group: ${countall} - servers reached: 
${UCOLOR}${countup} (${uphosts}%)${RESET}\nservers up with <= ${threshold} 
installable upgrades: ${PCOLOR}${countgood} (${progress}%)${RESET} - servers up 
with > ${threshold} installable upgrades: ${PCOLOR}${countbad} 
(${lefttogo}%)${RESET}\n\n"
+
+       if [ $uphosts == "100.00" ] && [ $progress == "100.00" ]; then
+               echo -e "\n${BRIGHT_GREEN}Yay!${RESET} Looks all 
${BRIGHT_GREEN}good${RESET}. Here's your kitten ..:)\n"
+               kitten "purr .. want to mail this to RT now? .."
+       else
+               echo -e "\n${BRIGHT_RED}:( keep going.${RESET} There are 
upgrades left to do, some hosts are down or the node list is outdated.\n"
+       fi
+
+}
+
+function kitten() {
+       echo -e ${BRIGHT_VIOLET}
+       echo $1 | cowsay -f hellokitty
+       echo -e $RESET
+}
+
+function updategroup() {
+
+       nodegroup=$1
+       overwrite="no"
+
+       echo -e "${BRIGHT_WHITE}fetching puppet_hosts.cfg from icinga on neon 
via scp .. '${nodegroup}'${RESET}\n"
+
+       scp root@icinga:/etc/icinga/puppet_hosts.cfg /tmp/puppet_hosts.cfg
+       grep host_name /tmp/puppet_hosts.cfg | cut -d " " -f23 | grep 
"^${nodegroup}" | sort > /tmp/node_group_${nodegroup}
+
+       echo -e "\nchecking ... cmp -s ${DSH_GROUP_DIR}/${nodegroup} 
/tmp/node_group_${nodegroup} \n"
+       echo -e "\n! for this to work the node group file must exist and icinga 
host names need to _start_ with the same string!\n"
+
+       # yeah, "cmp", not "diff" which might return 0 just for being able to 
open both files
+       cmp -s ${DSH_GROUP_DIR}/${nodegroup} /tmp/node_group_${nodegroup} > 
/dev/null
+
+       if [ $? -eq 1 ]; then
+
+               echo -e "${BRIGHT_WHITE}diff between old and new group 
'${nodegroup}'${RESET}\n\n"
+               diff ${DSH_GROUP_DIR}/${nodegroup} /tmp/node_group_${nodegroup}
+               echo -e "${BRIGHT_WHITE}do you want to overwrite? (y/n) 
'${nodegroup}'${RESET}\n\n"
+               read -p ">" overwrite
+
+               if [ $overwrite == "y" ]; then
+                       mv -i /tmp/node_group_${nodegroup} 
${DSH_GROUP_DIR}/${nodegroup}
+                       echo -e "${BRIGHT_WHITE}done and updated. bye.${RESET}"
+               else
+                       echo -e "${BRIGHT_WHITE}NOT written. bye.${RESET}"
+               fi
+       else
+               echo -e "${BRIGHT_WHITE}either node group '${nodegroup}' is 
already up-to-date or it did not exist. nothing to do.${RESET}\n\n"
+       fi
+
+}
+
+function uptimecheck() {
+
+       nodegroup=$1
+       maxuptime=$2
+       countup=0
+       countgood=0
+       countbad=0
+
+       echo "" > /tmp/uptime_check_${nodegroup}
+       echo "" > /tmp/uptime_check_result_${nodegroup}
+
+       echo -e "${BRIGHT_WHITE}getting raw uptimes from /proc for 
'${nodegroup}' (seconds) (max. uptime: $maxuptime days)${RESET}\n"
+       $DSH -M -g $nodegroup "cut -d \" \" -f1 /proc/uptime " | tee 
/tmp/uptime_check_${nodegroup}
+
+       echo -e "\n${BRIGHT_WHITE}sorting results ...${RESET}\n"
+
+       while read curline; do
+
+               host_name=$(echo $curline | cut -d ":" -f1 )
+               host_uptime=$(echo $curline | cut -d " " -f2)
+               host_uptime=$(echo "scale=0; ${host_uptime}/60/60/24" | bc)
+
+               if [ $host_uptime -lt $maxuptime ]; then
+                       HCOLOR=$BRIGHT_GREEN
+                       let countgood=countgood+1
+               else
+               HCOLOR=$BRIGHT_RED
+                       let countbad=countbad+1
+               fi
+
+               echo -e "${host_uptime} days of uptime on 
${HCOLOR}$host_name${RESET}" >> /tmp/uptime_check_result_${nodegroup}
+
+       done < /tmp/uptime_check_${nodegroup}
+
+       sort -nr /tmp/uptime_check_result_${nodegroup}
+
+       countall=$(wc -l /etc/dsh/group/${nodegroup}| cut -d " " -f1)
+       countup=$(wc -l /tmp/uptime_check_${nodegroup} | cut -d " " -f1)
+       uphosts=$(echo "scale=2; ${countup}*100/${countall}" | bc)
+       progress=$(echo "scale=2; ${countgood}*100/${countup}" | bc)
+       lefttogo=$(echo "scale=2; 100-${progress}" | bc)
+
+       colorize
+
+       echo -e "\n${BRIGHT_WHITE}results for 
'${nodegroup}':${RESET}\n\nservers in group: ${countall} - servers reached: 
${UCOLOR}${countup} (${uphosts}%)${RESET}\nservers up with good uptime: 
${PCOLOR}${countgood} (${progress}%)${RESET} - servers up with bad uptime: 
${PCOLOR}${countbad} (${lefttogo}%)${RESET}\n\n"
+
+       if [ $uphosts == "100.00" ] && [ $progress == "100.00" ]; then
+               echo -e "\n${BRIGHT_GREEN}Yay!${RESET} Looks all 
${BRIGHT_GREEN}good${RESET}. Here's your kitten ..:)\n"
+               kitten "purr .. want to mail this to RT now? .."
+       else
+               echo -e "\n${BRIGHT_RED}:( keep going.${RESET} Some hosts still 
need reboots, are down or the node list is outdated.\n"
+       fi
+
+}
+
+# main
+menu

-- 
To view, visit https://gerrit.wikimedia.org/r/238639
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I98cd66c622a25d1d64753e26aafd90ccb5089af3
Gerrit-PatchSet: 1
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Dzahn <dz...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to