Hello community, here is the log from the commit of package kubernetes-salt for openSUSE:Factory checked in at 2018-07-13 10:21:35 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/kubernetes-salt (Old) and /work/SRC/openSUSE:Factory/.kubernetes-salt.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "kubernetes-salt" Fri Jul 13 10:21:35 2018 rev:29 rq:622262 version:4.0.0+git_r863_5c11a33 Changes: -------- --- /work/SRC/openSUSE:Factory/kubernetes-salt/kubernetes-salt.changes 2018-06-27 10:20:37.537778059 +0200 +++ /work/SRC/openSUSE:Factory/.kubernetes-salt.new/kubernetes-salt.changes 2018-07-13 10:21:36.354473730 +0200 @@ -1,0 +2,70 @@ +Thu Jul 12 07:56:05 UTC 2018 - [email protected] + +- Commit d03c2fa by Rafael Fernández López [email protected] + Add haproxy migration sls to apply during upgrade + + During an upgrade from 2.0 to 3.0, workers will lose communication with the + apiservers on the master nodes because of an auth change. After we have + applied all the master nodes, and before we start looping over the workers, + apply haproxy system-wide on all the workers, allowing their haproxy to + update its configuration, thus, being able to authenticate against the + apiservers again. + + This patch includes a new tree structure, meant to be destroyed between + versions, but that allows to not poison the main structure of states with + transient migration logic. The structure is as follows: + + - migrations + - <orig_version>-<target_version> + - overriden-sls/* + - * (direct actions that can spawn other migration tasks) + + Fixes: bsc#1100212 + + Commit f190a7a by Rafael Fernández López [email protected] + Migrate all labels when renaming a node (builtin and user-defined labels). + + Fixes: bsc#1100891 + + Commit a7e1b72 by Rafael Fernández López [email protected] + Only perform migrations on machines that are going to be updated. + + On an upgrade process we are going to perform different migrations; only + perform these migrations on machines that are part of the current subset of + machines to be updated. + + Fixes: bsc#1100115 + + +------------------------------------------------------------------- +Mon Jul 9 09:49:10 UTC 2018 - [email protected] + +- Commit a609b3c by David Helkowski [email protected] + Add configmap from pillar data to dex ldap connectors + + +------------------------------------------------------------------- +Fri Jul 6 09:44:10 UTC 2018 - [email protected] + +- Commit 8ded363 by Michal Jura [email protected] + [CPI] Add option to ignore OpenStack Cinder availability zone, bsc#1095572 + + Ignore OpenStack Cinder avability zone when attaching volumes. When Nova and + Cinder have different availability zones, this should be set to true. Default + is false. + + +------------------------------------------------------------------- +Thu Jul 5 12:22:41 UTC 2018 - [email protected] + +- Commit fd3507f by Kiall Mac Innes [email protected] + Stop kubelet before any other services + + Explicitly stop kubelet before any other services. If cri.stop is ran in + parallel to or before kubelet.stop, kubelet will be unable to successfully + drain. + + bsc#1085980 + + +------------------------------------------------------------------- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ kubernetes-salt.spec ++++++ --- /var/tmp/diff_new_pack.3Qf5cu/_old 2018-07-13 10:21:38.086475803 +0200 +++ /var/tmp/diff_new_pack.3Qf5cu/_new 2018-07-13 10:21:38.086475803 +0200 @@ -32,7 +32,7 @@ Name: kubernetes-salt %define gitrepo salt -Version: 4.0.0+git_r853_e2b520b +Version: 4.0.0+git_r863_5c11a33 Release: 0 BuildArch: noarch Summary: Production-Grade Container Scheduling and Management ++++++ master.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/pillar/params.sls new/salt-master/pillar/params.sls --- old/salt-master/pillar/params.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/pillar/params.sls 2018-07-12 09:57:10.000000000 +0200 @@ -158,6 +158,9 @@ lb_mon_retries: '3' # OpenStack Cinder Block Storage API version bs_version: 'v2' + # Ignore OpenStack Cinder avability zone when attaching volumes. + # When Nova and Cinder have different availability zones, this should be set to true. + ignore_vol_az: 'false' # Configuration for the reboot manager (https://github.com/SUSE/rebootmgr). # notes: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/addons/dex/manifests/15-configmap.yaml new/salt-master/salt/addons/dex/manifests/15-configmap.yaml --- old/salt-master/salt/addons/dex/manifests/15-configmap.yaml 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/addons/dex/manifests/15-configmap.yaml 2018-07-12 09:57:10.000000000 +0200 @@ -43,6 +43,39 @@ groupAttr: uniqueMember nameAttr: cn + {% for con in salt['pillar.get']('dex:connectors', []) %} + {% if con['type'] == 'ldap' %} + - type: ldap + id: {{ con['id'] | yaml_dquote }} + name: {{ con['name'] | yaml_dquote }} + config: + host: {{ con['server'] | yaml_dquote }} + startTLS: {{ 'true' if con.get('start_tls',false) else 'false' }} + {% if con['bind']['anonymous'] %} + # bindDN and bindPW not present; anonymous bind will be used + {% else %} + bindDN: {{ con['bind']['dn'] | yaml_dquote }} + bindPW: {{ con['bind']['pw'] | yaml_dquote }} + {% endif %} + usernamePrompt: {{ con['username_prompt'] | yaml_dquote }} + rootCAData: {{ con['root_ca_data'] | replace("\n","") | yaml_dquote }} + userSearch: + baseDN: {{ con['user']['base_dn'] | yaml_dquote }} + filter: {{ con['user']['filter'] | yaml_dquote }} + username: {{ con['user']['attr_map']['username'] | yaml_dquote }} + idAttr: {{ con['user']['attr_map']['id'] | yaml_dquote }} + emailAttr: {{ con['user']['attr_map']['email'] | yaml_dquote }} + nameAttr: {{ con['user']['attr_map']['name'] | yaml_dquote }} + groupSearch: + baseDN: {{ con['group']['base_dn'] | yaml_dquote }} + filter: {{ con['group']['filter'] | yaml_dquote }} + + userAttr: {{ con['group']['attr_map']['user'] | yaml_dquote }} + groupAttr: {{ con['group']['attr_map']['group'] | yaml_dquote }} + + nameAttr: {{ con['group']['attr_map']['name'] | yaml_dquote }} + {% endif %} + {% endfor %} oauth2: skipApprovalScreen: true diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/cni/update-post-start-services.sls new/salt-master/salt/cni/update-post-start-services.sls --- old/salt-master/salt/cni/update-post-start-services.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/cni/update-post-start-services.sls 1970-01-01 01:00:00.000000000 +0100 @@ -1,10 +0,0 @@ -# invoked by the "update" orchestration after starting -# all the services after rebooting - -# CNI does not use the docker0 bridge: remove it -remove-docker-iface: - cmd.run: - - name: ip link delete docker0 - - onlyif: grep -q docker0 /proc/net/dev - # TODO: maybe we should restart dockerd... Note well: do that only when - # caasp_cri.cri_name() == 'docker' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/cni/update-pre-orchestration.sh new/salt-master/salt/cni/update-pre-orchestration.sh --- old/salt-master/salt/cni/update-pre-orchestration.sh 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/cni/update-pre-orchestration.sh 1970-01-01 01:00:00.000000000 +0100 @@ -1,51 +0,0 @@ -#!/bin/sh - -NODE_ID="$1" -EXTERNAL_IP="$2" -BACKEND_TYPE="$3" - -FLANNEL_STATE_FILE="/run/flannel/subnet.env" - -########################################################## - -log() { echo "[CNI migration]: $1 " ; logger -t "cni-migration" "$1" ; } - -exit_changes() { - log "$2" - echo # an empty line here so the next line will be the last. - echo "changed=$1 comment='"$2"'" - exit 0 -} - -get_node_cidr() { - kubectl --request-timeout=1m get node "$NODE_ID" --template="{{.spec.podCIDR}}" -} - -patch_node() { - kubectl --request-timeout=1m patch node $NODE_ID -p "$@" 2>/dev/null -} - -########################################################## - -log "migrating $NODE_ID CIDR" - -[ -e "$FLANNEL_STATE_FILE" ] || exit_changes "no" "no flannel state file found" -source $FLANNEL_STATE_FILE -old_node_cidr=$(echo "$FLANNEL_SUBNET" | sed -e "s/\.1\//\.0\//g") -log "flannel state file found with node CIDR=$old_node_cidr" - -curr_node_cidr=$(get_node_cidr) -if [ -n "$curr_node_cidr" ] && [ "$curr_node_cidr" != "<no value>" ] ; then - exit_changes "no" "node already has a podCIDR:$curr_node_cidr" -fi - -log "$NODE_ID does not have a CIDR assigned: setting $old_node_cidr" -patch_node "{\"spec\":{\"podCIDR\":\"$old_node_cidr\"}}" -curr_node_cidr=$(get_node_cidr) - -log "adding some annotations..." -patch_node "{\"metadata\":{\"annotations\":{\"alpha.kubernetes.io/provided-node-ip\": \"$EXTERNAL_IP\"}}}" -patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/public-ip\": \"$EXTERNAL_IP\"}}}" -patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/kube-subnet-manager\": true}}}" -patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/backend-type\": \"$BACKEND_TYPE\"}}}" -exit_changes "yes" "new CIDR set for $NODE_ID podCIDR:$curr_node_cidr" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/cni/update-pre-orchestration.sls new/salt-master/salt/cni/update-pre-orchestration.sls --- old/salt-master/salt/cni/update-pre-orchestration.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/cni/update-pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 @@ -1,19 +0,0 @@ -# invoked by the "update" orchestration right -# before starting the real orchestration updating -# and rebooting machines - -include: - - kubectl-config - -# try to save the flannel subnet in the .spec.podCIDR (if not assigned yet) -/tmp/cni-update-pre-orchestration.sh: - file.managed: - - source: salt://cni/update-pre-orchestration.sh - - mode: 0755 - cmd.run: - - name: /tmp/cni-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ salt.caasp_net.get_primary_ip() }} {{ salt.caasp_pillar.get('flannel:backend', 'vxlan') }} - - stateful: True - - env: - - KUBECONFIG: {{ pillar['paths']['kubeconfig'] }} - - require: - - {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/cni/update-pre-reboot.sls new/salt-master/salt/cni/update-pre-reboot.sls --- old/salt-master/salt/cni/update-pre-reboot.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/cni/update-pre-reboot.sls 1970-01-01 01:00:00.000000000 +0100 @@ -1,16 +0,0 @@ -# invoked by the "update" orchestration right -# before rebooting a machine - -uninstall-flannel: - # we cannot remove the flannel package, so we can only - # make sure that the service is disabled - service.disabled: - - name: flanneld - -remove-flannel-files-1: - file.absent: - - name: /run/flannel/docker - -remove-flannel-files-2: - file.absent: - - name: /var/run/flannel diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/etcd/update-pre-orchestration.sls new/salt-master/salt/etcd/update-pre-orchestration.sls --- old/salt-master/salt/etcd/update-pre-orchestration.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/etcd/update-pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 @@ -1,24 +0,0 @@ -{% set roles = salt['grains.get']('roles', []) %} -{% set has_etcd_role = ("etcd" in roles) %} - -{% set is_etcd_member = salt['file.directory_exists' ]('/var/lib/etcd/member') and - not salt['file.directory_exists' ]('/var/lib/etcd/proxy') %} - -{%- if is_etcd_member and not has_etcd_role -%} - # this is really running a member of the etcd cluster but it doesn't - # have the 'etcd' role: set the 'etcd' role so we are sure it will be - # running etcd after the update - -add-etcd-role: - grains.append: - - name: roles - - value: etcd - -{%- else %} - -{# See https://github.com/saltstack/salt/issues/14553 #} -update-pre-orchestration-dummy: - cmd.run: - - name: "echo saltstack bug 14553" - -{%- endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/etcd/update-pre-reboot.sls new/salt-master/salt/etcd/update-pre-reboot.sls --- old/salt-master/salt/etcd/update-pre-reboot.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/etcd/update-pre-reboot.sls 2018-07-12 09:57:10.000000000 +0200 @@ -1,24 +0,0 @@ -{% set roles = salt['grains.get']('roles', []) %} -{% set has_etcd_role = ("etcd" in roles) %} - -{% if not has_etcd_role %} - # make sure there is nothing left in /var/lib/etcd - -cleanup-old-etcd-stuff: - cmd.run: - - name: rm -rf /var/lib/etcd/* - -uninstall-etcd: - # we cannot remove the etcd package, so we can only - # make sure that the service is disabled - service.disabled: - - name: etcd - -{%- else %} - -{# See https://github.com/saltstack/salt/issues/14553 #} -update-pre-reboot-dummy: - cmd.run: - - name: "echo saltstack bug 14553" - -{%- endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/haproxy/init.sls new/salt-master/salt/haproxy/init.sls --- old/salt-master/salt/haproxy/init.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/haproxy/init.sls 2018-07-12 09:57:10.000000000 +0200 @@ -77,6 +77,7 @@ - namespace: kube-system - timeout: 60 - onchanges: + - file: haproxy - file: /etc/caasp/haproxy/haproxy.cfg {% if not salt.caasp_nodes.is_admin_node() %} - require: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/kubelet/init.sls new/salt-master/salt/kubelet/init.sls --- old/salt-master/salt/kubelet/init.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/kubelet/init.sls 2018-07-12 09:57:10.000000000 +0200 @@ -99,6 +99,20 @@ - require: - service: kubelet +# Wait for the kubelet to be healthy. +kubelet-health-check: + caasp_retriable.retry: + - target: caasp_http.wait_for_successful_query + - name: http://localhost:10248/healthz + - wait_for: 300 + - retry: + attempts: 3 + - status: 200 + - opts: + http_request_timeout: 30 + - onchanges: + - service: kubelet + ####################### # config files ####################### diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/kubelet/update-post-start-services.sls new/salt-master/salt/kubelet/update-post-start-services.sls --- old/salt-master/salt/kubelet/update-post-start-services.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/kubelet/update-post-start-services.sls 2018-07-12 09:57:10.000000000 +0200 @@ -30,11 +30,3 @@ - name: "echo {{ grains['nodename'] }} should not be uncordoned. Skipping." {% endif %} - -remove-old-node-entry: - cmd.run: - - name: kubectl --request-timeout=1m delete node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} - - check_cmd: - - /bin/true - - onlyif: - - kubectl --request-timeout=1m get node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/kubelet/update-pre-orchestration.sh new/salt-master/salt/kubelet/update-pre-orchestration.sh --- old/salt-master/salt/kubelet/update-pre-orchestration.sh 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/kubelet/update-pre-orchestration.sh 1970-01-01 01:00:00.000000000 +0100 @@ -1,83 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Preseeds a node in Kubernetes with critical data migrated from -# an old node. - -OLD_NODE_NAME="$1" -NEW_NODE_NAME="$2" -ROLE="$3" - -########################################################## - -log() { echo "[machine-id migration]: $1 " ; logger -t "machine-id-migration" "$1" ; } - -log_changes() { - log "$2" - echo # an empty line here so the next line will be the last. - echo "changed=$1 comment='"$2"'" -} - -exit_changes() { - log_changes "$1" "$2" - exit 0 -} - -get_node_data() { - local template="$1" - kubectl --request-timeout=1m get node "$OLD_NODE_NAME" --template="{{$template}}" -} - -exit_handler() { - # Stashing $? MUST be the first command in the handler. - code=$? - if [ "$code" != "0" ]; then - log_changes "yes" "Unknown failure migrating from $OLD_NODE_NAME to $NEW_NODE_NAME" - fi - exit $code -} - -trap "exit_handler" INT TERM EXIT - -########################################################## - -log "migrating $OLD_NODE_NAME to $NEW_NODE_NAME" - -kubectl --request-timeout=1m get node $NEW_NODE_NAME && exit_changes "no" "$NEW_NODE_NAME already exists, nothing to migrate" -kubectl --request-timeout=1m get node $OLD_NODE_NAME || exit_changes "no" "$OLD_NODE_NAME does not exist, nothing to migrate" - -cat << EOF > /tmp/k8s-node-migration.yaml -apiVersion: v1 -kind: Node -metadata: - name: ${NEW_NODE_NAME} - labels: - kubernetes.io/hostname: '$(get_node_data 'index .metadata.labels "kubernetes.io/hostname"')' - beta.kubernetes.io/arch: '$(get_node_data 'index .metadata.labels "beta.kubernetes.io/arch"')' - beta.kubernetes.io/os: '$(get_node_data 'index .metadata.labels "beta.kubernetes.io/os"')' - annotations: - node.alpha.kubernetes.io/ttl: '$(get_node_data 'index .metadata.annotations "node.alpha.kubernetes.io/ttl"')' - volumes.kubernetes.io/controller-managed-attach-detach: '$(get_node_data 'index .metadata.annotations "volumes.kubernetes.io/controller-managed-attach-detach"')' - flannel.alpha.coreos.com/backend-data: '$(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/backend-data"')' - flannel.alpha.coreos.com/backend-type: '$(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/backend-type"')' - flannel.alpha.coreos.com/public-ip: $(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/public-ip"') - flannel.alpha.coreos.com/kube-subnet-manager: "true" -spec: - externalID: ${NEW_NODE_NAME} - podCIDR: $(get_node_data .spec.podCIDR) -EOF - -if [[ "$ROLE" == "master" ]]; then - cat << EOF >> /tmp/k8s-node-migration.yaml - taints: - - effect: NoSchedule - key: node-role.kubernetes.io/master -EOF -fi - -kubectl --request-timeout=1m create -f /tmp/k8s-node-migration.yaml 2>/dev/null - -rm /tmp/k8s-node-migration.yaml - -exit_changes "yes" "Node data migrated from $OLD_NODE_NAME to $NEW_NODE_NAME" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/kubelet/update-pre-orchestration.sls new/salt-master/salt/kubelet/update-pre-orchestration.sls --- old/salt-master/salt/kubelet/update-pre-orchestration.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/kubelet/update-pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 @@ -1,23 +0,0 @@ -# invoked by the "update" orchestration right -# before starting the real orchestration updating -# and rebooting machines - -include: - - kubectl-config - -# Migrates critical data from the old K8S node, to a new one with updated names -/tmp/kubelet-update-pre-orchestration.sh: - file.managed: - - source: salt://kubelet/update-pre-orchestration.sh - - mode: 0755 - cmd.run: -{% if "kube-master" in salt['grains.get']('roles', []) %} - - name: /tmp/kubelet-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ grains['nodename'] }} master -{% else %} - - name: /tmp/kubelet-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ grains['nodename'] }} worker -{% endif %} - - stateful: True - - env: - - KUBECONFIG: {{ pillar['paths']['kubeconfig'] }} - - require: - - {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/kubernetes-common/openstack-config.jinja new/salt-master/salt/kubernetes-common/openstack-config.jinja --- old/salt-master/salt/kubernetes-common/openstack-config.jinja 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/kubernetes-common/openstack-config.jinja 2018-07-12 09:57:10.000000000 +0200 @@ -24,3 +24,4 @@ [BlockStorage] trust-device-path=false bs-version={{ pillar['cloud']['openstack']['bs_version'] }} +ignore-volume-az={{ pillar['cloud']['openstack']['ignore_vol_az'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/cni/cni-update-pre-orchestration.sh new/salt-master/salt/migrations/2-3/cni/cni-update-pre-orchestration.sh --- old/salt-master/salt/migrations/2-3/cni/cni-update-pre-orchestration.sh 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/cni/cni-update-pre-orchestration.sh 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,51 @@ +#!/bin/sh + +NODE_ID="$1" +EXTERNAL_IP="$2" +BACKEND_TYPE="$3" + +FLANNEL_STATE_FILE="/run/flannel/subnet.env" + +########################################################## + +log() { echo "[CNI migration]: $1 " ; logger -t "cni-migration" "$1" ; } + +exit_changes() { + log "$2" + echo # an empty line here so the next line will be the last. + echo "changed=$1 comment='"$2"'" + exit 0 +} + +get_node_cidr() { + kubectl --request-timeout=1m get node "$NODE_ID" --template="{{.spec.podCIDR}}" +} + +patch_node() { + kubectl --request-timeout=1m patch node $NODE_ID -p "$@" 2>/dev/null +} + +########################################################## + +log "migrating $NODE_ID CIDR" + +[ -e "$FLANNEL_STATE_FILE" ] || exit_changes "no" "no flannel state file found" +source $FLANNEL_STATE_FILE +old_node_cidr=$(echo "$FLANNEL_SUBNET" | sed -e "s/\.1\//\.0\//g") +log "flannel state file found with node CIDR=$old_node_cidr" + +curr_node_cidr=$(get_node_cidr) +if [ -n "$curr_node_cidr" ] && [ "$curr_node_cidr" != "<no value>" ] ; then + exit_changes "no" "node already has a podCIDR:$curr_node_cidr" +fi + +log "$NODE_ID does not have a CIDR assigned: setting $old_node_cidr" +patch_node "{\"spec\":{\"podCIDR\":\"$old_node_cidr\"}}" +curr_node_cidr=$(get_node_cidr) + +log "adding some annotations..." +patch_node "{\"metadata\":{\"annotations\":{\"alpha.kubernetes.io/provided-node-ip\": \"$EXTERNAL_IP\"}}}" +patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/public-ip\": \"$EXTERNAL_IP\"}}}" +patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/kube-subnet-manager\": true}}}" +patch_node "{\"metadata\":{\"annotations\":{\"flannel.alpha.coreos.com/backend-type\": \"$BACKEND_TYPE\"}}}" +exit_changes "yes" "new CIDR set for $NODE_ID podCIDR:$curr_node_cidr" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/cni/post-start-services.sls new/salt-master/salt/migrations/2-3/cni/post-start-services.sls --- old/salt-master/salt/migrations/2-3/cni/post-start-services.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/cni/post-start-services.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,10 @@ +# invoked by the "update" orchestration after starting +# all the services after rebooting + +# CNI does not use the docker0 bridge: remove it +remove-docker-iface: + cmd.run: + - name: ip link delete docker0 + - onlyif: grep -q docker0 /proc/net/dev + # TODO: maybe we should restart dockerd... Note well: do that only when + # caasp_cri.cri_name() == 'docker' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/cni/pre-orchestration.sls new/salt-master/salt/migrations/2-3/cni/pre-orchestration.sls --- old/salt-master/salt/migrations/2-3/cni/pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/cni/pre-orchestration.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,19 @@ +# invoked by the "update" orchestration right +# before starting the real orchestration updating +# and rebooting machines + +include: + - kubectl-config + +# try to save the flannel subnet in the .spec.podCIDR (if not assigned yet) +/tmp/cni-update-pre-orchestration.sh: + file.managed: + - source: salt://migrations/2-3/cni/cni-update-pre-orchestration.sh + - mode: 0755 + cmd.run: + - name: /tmp/cni-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ salt.caasp_net.get_primary_ip() }} {{ salt.caasp_pillar.get('flannel:backend', 'vxlan') }} + - stateful: True + - env: + - KUBECONFIG: {{ pillar['paths']['kubeconfig'] }} + - require: + - {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/cni/pre-reboot.sls new/salt-master/salt/migrations/2-3/cni/pre-reboot.sls --- old/salt-master/salt/migrations/2-3/cni/pre-reboot.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/cni/pre-reboot.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,16 @@ +# invoked by the "update" orchestration right +# before rebooting a machine + +uninstall-flannel: + # we cannot remove the flannel package, so we can only + # make sure that the service is disabled + service.disabled: + - name: flanneld + +remove-flannel-files-1: + file.absent: + - name: /run/flannel/docker + +remove-flannel-files-2: + file.absent: + - name: /var/run/flannel diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/etcd/pre-orchestration.sls new/salt-master/salt/migrations/2-3/etcd/pre-orchestration.sls --- old/salt-master/salt/migrations/2-3/etcd/pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/etcd/pre-orchestration.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,24 @@ +{% set roles = salt['grains.get']('roles', []) %} +{% set has_etcd_role = ("etcd" in roles) %} + +{% set is_etcd_member = salt['file.directory_exists' ]('/var/lib/etcd/member') and + not salt['file.directory_exists' ]('/var/lib/etcd/proxy') %} + +{%- if is_etcd_member and not has_etcd_role -%} + # this is really running a member of the etcd cluster but it doesn't + # have the 'etcd' role: set the 'etcd' role so we are sure it will be + # running etcd after the update + +add-etcd-role: + grains.append: + - name: roles + - value: etcd + +{%- else %} + +{# See https://github.com/saltstack/salt/issues/14553 #} +update-pre-orchestration-dummy: + cmd.run: + - name: "echo saltstack bug 14553" + +{%- endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/etcd/pre-reboot.sls new/salt-master/salt/migrations/2-3/etcd/pre-reboot.sls --- old/salt-master/salt/migrations/2-3/etcd/pre-reboot.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/etcd/pre-reboot.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,24 @@ +{% set roles = salt['grains.get']('roles', []) %} +{% set has_etcd_role = ("etcd" in roles) %} + +{% if not has_etcd_role %} + # make sure there is nothing left in /var/lib/etcd + +cleanup-old-etcd-stuff: + cmd.run: + - name: rm -rf /var/lib/etcd/* + +disable-etcd: + # we cannot remove the etcd package, so we can only + # make sure that the service is disabled + service.disabled: + - name: etcd + +{%- else %} + +{# See https://github.com/saltstack/salt/issues/14553 #} +update-pre-reboot-dummy: + cmd.run: + - name: "echo saltstack bug 14553" + +{%- endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/haproxy/haproxy.cfg.jinja new/salt-master/salt/migrations/2-3/haproxy/haproxy.cfg.jinja --- old/salt-master/salt/migrations/2-3/haproxy/haproxy.cfg.jinja 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/haproxy/haproxy.cfg.jinja 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,95 @@ +{%- set this_roles = salt['grains.get']('roles', [])%} + +{%- set bind_ip = "0.0.0.0" if "kube-master" in this_roles else "127.0.0.1" -%} + +{%- set masters = salt.caasp_nodes.get_with_expr('G@roles:kube-master', + excluded_grains=['node_removal_in_progress'], + grain='nodename') %} +{% if not masters %} + {# fail early instead of generating a config file that is useless... #} + {% do salt.caasp_log.abort('No masters found when calculating backends for haproxy') %} +{% endif %} + +global + log /dev/log local0 + log /dev/log local1 notice + +defaults + log global + mode http + option redispatch + option dontlognull + balance roundrobin + timeout connect 5s + timeout client 120s + timeout http-request 30s + timeout client-fin 30s + timeout server 120s + timeout tunnel 0 + default-server inter 10s fall 2 + +# Listen on the standard Kube-API Public port, 6443 by default, and proxy to the masters on +# the Kube-API internal port, 6444 by default. +frontend kubernetes-master + bind {{ bind_ip }}:{{ pillar['api']['ssl_port'] }} ssl crt {{ pillar['ssl']['kube_apiserver_proxy_bundle'] }} ca-file /etc/pki/ca.crt verify optional + timeout client 0 + http-request set-header X-Remote-User %{+Q}[ssl_c_s_dn(cn)] if { ssl_c_used ssl_c_verify 0 } + http-request set-header X-Remote-Group %{+Q}[ssl_c_s_dn(o)] if { ssl_c_used ssl_c_verify 0 } + # If no certificate is passed, or if it's invalid, remove the auth headers + http-request del-header X-Remote-User unless { ssl_c_used ssl_c_verify 0 } + http-request del-header X-Remote-Group unless { ssl_c_used ssl_c_verify 0 } + http-request del-header X-Remote-Extra unless { ssl_c_used ssl_c_verify 0 } + acl streaming_logs url_reg [?&]follow=true + acl streaming_logs url_reg [?&]watch=true + use_backend no-timeout-backend if streaming_logs + acl interactive_session url_reg [?&]tty=true + use_backend no-timeout-backend if interactive_session + default_backend default-backend + +backend default-backend + option forwardfor + option httpchk GET /healthz +{% for id, nodename in masters.items() %} + server master-{{ nodename }} {{ nodename }}.{{ pillar['internal_infra_domain'] }}:{{ pillar['api']['int_ssl_port'] }} ssl crt {{ pillar['ssl']['kube_apiserver_proxy_bundle'] }} ca-file /etc/pki/ca.crt check check-ssl port {{ pillar['api']['int_ssl_port'] }} verify required +{%- endfor %} + +backend no-timeout-backend + option forwardfor + option forceclose + option http-server-close + option httpchk GET /healthz + timeout server 0 + timeout tunnel 0 +{% for id, nodename in masters.items() %} + server master-{{ nodename }} {{ nodename }}.{{ pillar['internal_infra_domain'] }}:{{ pillar['api']['int_ssl_port'] }} ssl crt {{ pillar['ssl']['kube_apiserver_proxy_bundle'] }} ca-file /etc/pki/ca.crt check check-ssl port {{ pillar['api']['int_ssl_port'] }} verify required +{%- endfor %} + + +{% if "admin" in this_roles %} +# Velum should be able to access Kube API and Dex service as well to get kubeconfig +listen kubernetes-dex + bind {{ bind_ip }}:{{ pillar['dex']['node_port'] }} + mode tcp + default-server inter 10s fall 2 + balance roundrobin + option redispatch + option httpchk GET /healthz +{% for id, nodename in masters.items() %} + server master-{{ nodename }} {{ nodename }}.{{ pillar['internal_infra_domain'] }}:{{ pillar['dex']['node_port'] }} check check-ssl port {{ pillar['dex']['node_port'] }} verify none +{%- endfor %} + +listen velum + bind 0.0.0.0:80 + bind 0.0.0.0:443 ssl crt {{ pillar['ssl']['velum_bundle'] }} ca-file /etc/pki/ca.crt + acl path_autoyast path_reg ^/autoyast$ + option forwardfor + http-request set-header X-Forwarded-Proto https + redirect scheme https code 302 if !{ ssl_fc } !path_autoyast + server velum unix@/var/run/puma/dashboard.sock + +listen velum-api + bind 127.0.0.1:444 ssl crt {{ pillar['ssl']['velum_bundle'] }} ca-file /etc/pki/ca.crt + option forwardfor + http-request set-header X-Forwarded-Proto https + server velum unix@/var/run/puma/api.sock +{% endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/haproxy/haproxy.yaml.jinja new/salt-master/salt/migrations/2-3/haproxy/haproxy.yaml.jinja --- old/salt-master/salt/migrations/2-3/haproxy/haproxy.yaml.jinja 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/haproxy/haproxy.yaml.jinja 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,72 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: haproxy + namespace: kube-system + labels: + name: haproxy + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' +spec: + restartPolicy: Always + hostNetwork: true + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: "CriticalAddonsOnly" + operator: "Exists" + containers: + - name: haproxy + image: sles12/haproxy:1.6.0 + resources: + requests: + memory: 128Mi + limits: + memory: 128Mi + volumeMounts: + - name: haproxy-cfg + mountPath: /etc/haproxy + - name: ca-certificate + mountPath: /etc/pki/ca.crt + readOnly: True + - name: kubernetes-proxy-bundle-certificate + mountPath: {{ pillar['ssl']['kube_apiserver_proxy_bundle'] }} + readOnly: True +{% if "admin" in salt['grains.get']('roles', []) %} + - name: etc-hosts + mountPath: /etc/hosts + - name: velum-bundle-certificate + mountPath: {{ pillar['ssl']['velum_bundle'] }} + readOnly: True + - name: velum-unix-socket + mountPath: /var/run/puma +{% endif %} + volumes: + - name: haproxy-cfg + hostPath: + path: /etc/caasp/haproxy + - name: ca-certificate + hostPath: +{% if "admin" in salt['grains.get']('roles', []) %} + path: /etc/pki/ca.crt +{% else %} + path: {{ pillar['ssl']['ca_file'] }} +{% endif %} + type: FileOrCreate + - name: kubernetes-proxy-bundle-certificate + hostPath: + path: {{ pillar['ssl']['kube_apiserver_proxy_bundle'] }} + type: FileOrCreate +{% if "admin" in salt['grains.get']('roles', []) %} + - name: etc-hosts + hostPath: + path: /etc/hosts + - name: velum-bundle-certificate + hostPath: + path: {{ pillar['ssl']['velum_bundle'] }} + - name: velum-unix-socket + hostPath: + path: /var/run/puma +{% endif %} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/haproxy/init.sls new/salt-master/salt/migrations/2-3/haproxy/init.sls --- old/salt-master/salt/migrations/2-3/haproxy/init.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/haproxy/init.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,78 @@ +include: + - ca-cert + - cert + +kubelet_stop: + cmd.run: + - name: systemctl stop kubelet + +# NOTE: Remove me for 4.0 + +/etc/caasp/haproxy/haproxy.cfg: + file.managed: + - source: salt://migrations/2-3/haproxy/haproxy.cfg.jinja + - template: jinja + - user: root + - group: root + - mode: 644 + - makedirs: True + - dir_mode: 755 + - require: + - kubelet_stop + +{% from '_macros/certs.jinja' import certs, alt_master_names with context %} +{{ certs("kube-apiserver-proxy", + pillar['ssl']['kube_apiserver_proxy_crt'], + pillar['ssl']['kube_apiserver_proxy_key'], + cn = grains['nodename'] + '-proxy', + o = pillar['certificate_information']['subject_properties']['O'], + extra_alt_names = alt_master_names()) }} + +haproxy: + file.managed: + - name: /etc/kubernetes/manifests/haproxy.yaml + - source: salt://migrations/2-3/haproxy/haproxy.yaml.jinja + - template: jinja + - user: root + - group: root + - mode: 644 + - makedirs: True + - dir_mode: 755 + - require: + - kubelet_stop + caasp_retriable.retry: + - name: iptables-haproxy +{% if "kube-master" in salt['grains.get']('roles', []) %} + - target: iptables.append +{% else %} + - target: iptables.delete +{% endif %} + - retry: + attempts: 2 + - table: filter + - family: ipv4 + - chain: INPUT + - jump: ACCEPT + - match: state + - connstate: NEW + - dports: + - {{ pillar['api']['ssl_port'] }} + - proto: tcp + +haproxy_kill: + cmd.run: + - name: |- + haproxy_ids=$(docker ps | grep -E "k8s_(POD_)?haproxy.*_kube-system_" | awk '{print $1}') + if [ -n "$haproxy_ids" ]; then + docker kill $haproxy_ids + fi + - check_cmd: + - /bin/true + - require: + - file: haproxy + +kubelet_start: + cmd.run: + - name: systemctl start kubelet + - require: + - haproxy_kill diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/kubelet/cordon.sls new/salt-master/salt/migrations/2-3/kubelet/cordon.sls --- old/salt-master/salt/migrations/2-3/kubelet/cordon.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/kubelet/cordon.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,13 @@ +include: + - kubectl-config + +cordon-old-kubelet-name: + cmd.run: + - name: | + kubectl --request-timeout=1m --kubeconfig={{ pillar['paths']['kubeconfig'] }} cordon {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} + - check_cmd: + - /bin/true + - onlyif: + - kubectl --request-timeout=1m --kubeconfig={{ pillar['paths']['kubeconfig'] }} get node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} + - require: + - file: {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/kubelet/drain.sls new/salt-master/salt/migrations/2-3/kubelet/drain.sls --- old/salt-master/salt/migrations/2-3/kubelet/drain.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/kubelet/drain.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,13 @@ +include: + - kubectl-config + +drain-old-kubelet-name: + cmd.run: + - name: | + kubectl --request-timeout=1m --kubeconfig={{ pillar['paths']['kubeconfig'] }} drain {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} --force --delete-local-data=true --ignore-daemonsets + - check_cmd: + - /bin/true + - onlyif: + - kubectl --request-timeout=1m --kubeconfig={{ pillar['paths']['kubeconfig'] }} get node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} + - require: + - file: {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/kubelet/kubelet-update-pre-orchestration.sh new/salt-master/salt/migrations/2-3/kubelet/kubelet-update-pre-orchestration.sh --- old/salt-master/salt/migrations/2-3/kubelet/kubelet-update-pre-orchestration.sh 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/kubelet/kubelet-update-pre-orchestration.sh 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,85 @@ +#!/bin/bash + +set -euo pipefail + +# Preseeds a node in Kubernetes with critical data migrated from +# an old node. + +OLD_NODE_NAME="$1" +NEW_NODE_NAME="$2" +ROLE="$3" + +########################################################## + +log() { echo "[machine-id migration]: $1 " ; logger -t "machine-id-migration" "$1" ; } + +log_changes() { + log "$2" + echo # an empty line here so the next line will be the last. + echo "changed=$1 comment='"$2"'" +} + +exit_changes() { + log_changes "$1" "$2" + exit 0 +} + +get_node_data() { + local template="$1" + kubectl --request-timeout=1m get node "$OLD_NODE_NAME" --template="{{$template}}" +} + +get_node_labels() { + kubectl --request-timeout=1m get node "$OLD_NODE_NAME" --template='{{range $key, $value := .metadata.labels}} {{$key}}: {{$value}}{{"\n"}}{{end}}' +} + +exit_handler() { + # Stashing $? MUST be the first command in the handler. + code=$? + if [ "$code" != "0" ]; then + log_changes "yes" "Unknown failure migrating from $OLD_NODE_NAME to $NEW_NODE_NAME" + fi + exit $code +} + +trap "exit_handler" INT TERM EXIT + +########################################################## + +log "migrating $OLD_NODE_NAME to $NEW_NODE_NAME" + +kubectl --request-timeout=1m get node $NEW_NODE_NAME && exit_changes "no" "$NEW_NODE_NAME already exists, nothing to migrate" +kubectl --request-timeout=1m get node $OLD_NODE_NAME || exit_changes "no" "$OLD_NODE_NAME does not exist, nothing to migrate" + +cat << EOF > /tmp/k8s-node-migration.yaml +apiVersion: v1 +kind: Node +metadata: + name: ${NEW_NODE_NAME} + labels: +$(get_node_labels) + annotations: + node.alpha.kubernetes.io/ttl: '$(get_node_data 'index .metadata.annotations "node.alpha.kubernetes.io/ttl"')' + volumes.kubernetes.io/controller-managed-attach-detach: '$(get_node_data 'index .metadata.annotations "volumes.kubernetes.io/controller-managed-attach-detach"')' + flannel.alpha.coreos.com/backend-data: '$(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/backend-data"')' + flannel.alpha.coreos.com/backend-type: '$(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/backend-type"')' + flannel.alpha.coreos.com/public-ip: $(get_node_data 'index .metadata.annotations "flannel.alpha.coreos.com/public-ip"') + flannel.alpha.coreos.com/kube-subnet-manager: "true" +spec: + externalID: ${NEW_NODE_NAME} + podCIDR: $(get_node_data .spec.podCIDR) +EOF + +if [[ "$ROLE" == "master" ]]; then + cat << EOF >> /tmp/k8s-node-migration.yaml + taints: + - effect: NoSchedule + key: node-role.kubernetes.io/master +EOF +fi + +kubectl --request-timeout=1m create -f /tmp/k8s-node-migration.yaml 2>/dev/null + +rm /tmp/k8s-node-migration.yaml + +exit_changes "yes" "Node data migrated from $OLD_NODE_NAME to $NEW_NODE_NAME" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/kubelet/post-start-services.sls new/salt-master/salt/migrations/2-3/kubelet/post-start-services.sls --- old/salt-master/salt/migrations/2-3/kubelet/post-start-services.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/kubelet/post-start-services.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,12 @@ +include: + - kubectl-config + +remove-old-node-entry: + cmd.run: + - name: kubectl --request-timeout=1m delete node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} + - check_cmd: + - /bin/true + - onlyif: + - kubectl --request-timeout=1m get node {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} + - require: + - file: {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/migrations/2-3/kubelet/pre-orchestration.sls new/salt-master/salt/migrations/2-3/kubelet/pre-orchestration.sls --- old/salt-master/salt/migrations/2-3/kubelet/pre-orchestration.sls 1970-01-01 01:00:00.000000000 +0100 +++ new/salt-master/salt/migrations/2-3/kubelet/pre-orchestration.sls 2018-07-12 09:57:10.000000000 +0200 @@ -0,0 +1,23 @@ +# invoked by the "update" orchestration right +# before starting the real orchestration updating +# and rebooting machines + +include: + - kubectl-config + +# Migrates critical data from the old K8S node, to a new one with updated names +/tmp/kubelet-update-pre-orchestration.sh: + file.managed: + - source: salt://migrations/2-3/kubelet/kubelet-update-pre-orchestration.sh + - mode: 0755 + cmd.run: +{% if "kube-master" in salt['grains.get']('roles', []) %} + - name: /tmp/kubelet-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ grains['nodename'] }} master +{% else %} + - name: /tmp/kubelet-update-pre-orchestration.sh {{ grains['machine_id'] + "." + pillar['internal_infra_domain'] }} {{ grains['nodename'] }} worker +{% endif %} + - stateful: True + - env: + - KUBECONFIG: {{ pillar['paths']['kubeconfig'] }} + - require: + - {{ pillar['paths']['kubeconfig'] }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/orch/removal.sls new/salt-master/salt/orch/removal.sls --- old/salt-master/salt/orch/removal.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/orch/removal.sls 2018-07-12 09:57:10.000000000 +0200 @@ -202,6 +202,17 @@ # the replacement should be ready at this point: # we can remove the old node running in {{ target }} +early-stop-services-in-target: + salt.state: + - tgt: '{{ target }}' + - sls: + - kubelet.stop + - require: + - update-modules + {%- if replacement %} + - remove-addition-grain + {%- endif %} + stop-services-in-target: salt.state: - tgt: '{{ target }}' @@ -212,17 +223,13 @@ - kube-controller-manager.stop - kube-scheduler.stop {%- endif %} - - kubelet.stop - kube-proxy.stop - cri.stop {%- if target in etcd_members %} - etcd.stop {%- endif %} - require: - - update-modules - {%- if replacement %} - - remove-addition-grain - {%- endif %} + - early-stop-services-in-target # remove any other configuration in the machines cleanups-in-target-before-rebooting: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/salt-master/salt/orch/update.sls new/salt-master/salt/orch/update.sls --- old/salt-master/salt/orch/update.sls 2018-06-22 16:03:13.000000000 +0200 +++ new/salt-master/salt/orch/update.sls 2018-07-12 09:57:10.000000000 +0200 @@ -28,6 +28,7 @@ {%- set is_worker_tgt = is_responsive_node_tgt + ' and G@roles:kube-minion' %} {%- set is_updateable_master_tgt = is_updateable_tgt + ' and ' + is_master_tgt %} {%- set is_updateable_worker_tgt = is_updateable_tgt + ' and ' + is_worker_tgt %} +{%- set is_updateable_node_tgt = '( ' + is_updateable_master_tgt + ' ) or ( ' + is_updateable_worker_tgt + ' )' %} {%- set all_masters = salt.saltutil.runner('mine.get', tgt=is_master_tgt, fun='network.interfaces', tgt_type='compound').keys() %} {%- set super_master = all_masters|first %} @@ -116,16 +117,29 @@ # with the real update. pre-orchestration-migration: salt.state: - - tgt: '{{ is_regular_node_tgt }}' + - tgt: '{{ is_updateable_node_tgt }}' - tgt_type: compound - batch: 3 - sls: - - cni.update-pre-orchestration - - kubelet.update-pre-orchestration - - etcd.update-pre-orchestration + - migrations.2-3.cni.pre-orchestration + - migrations.2-3.kubelet.pre-orchestration + - migrations.2-3.etcd.pre-orchestration - require: - admin-setup + +# Before the real orchestration starts cordon all the worker nodes running 2.0. This way we ensure +# that no pods will be rescheduled on these machines while we upgrade: all rescheduled workloads +# will be strictly sent to upgraded nodes (the only ones uncordoned). +all-workers-2.0-pre-orchestration: + salt.state: + - tgt: '( {{ is_updateable_worker_tgt }} ) and G@osrelease:2.0' + - tgt_type: compound + - sls: + - migrations.2-3.kubelet.cordon + - require: + - pre-orchestration-migration + # NOTE: Remove me for 4.0 # # During an upgrade from 2.0 to 3.0, as we go master by master first, the first master will not @@ -155,7 +169,7 @@ - etcd - batch: 1 - require: - - pre-orchestration-migration + - all-workers-2.0-pre-orchestration # END NOTE early-services-setup: @@ -172,19 +186,28 @@ {%- set masters = salt.saltutil.runner('mine.get', tgt=is_updateable_master_tgt, fun='network.interfaces', tgt_type='compound') %} {%- for master_id in masters.keys() %} +# Kubelet needs other services, e.g. the cri, up + running. This provide a way +# to ensure kubelet is stopped before any other services. +{{ master_id }}-early-clean-shutdown: + salt.state: + - tgt: '{{ master_id }}' + - sls: + - kubelet.stop + - require: + - early-services-setup + {{ master_id }}-clean-shutdown: salt.state: - tgt: '{{ master_id }}' - sls: - container-feeder.stop - - kubelet.stop - kube-apiserver.stop - kube-controller-manager.stop - kube-scheduler.stop - cri.stop - etcd.stop - require: - - early-services-setup + - {{ master_id }}-early-clean-shutdown # Perform any necessary migrations before services are shutdown {{ master_id }}-pre-reboot: @@ -192,8 +215,8 @@ - tgt: '{{ master_id }}' - sls: - etc-hosts.update-pre-reboot - - cni.update-pre-reboot - - etcd.update-pre-reboot + - migrations.2-3.cni.pre-reboot + - migrations.2-3.etcd.pre-reboot - require: - {{ master_id }}-clean-shutdown @@ -247,6 +270,26 @@ - require: - {{ master_id }}-apply-haproxy +{% endfor %} + +all-masters-post-start-services: + salt.state: + - tgt: '{{ is_updateable_master_tgt }}' + - tgt_type: compound + - batch: 3 + - sls: + - migrations.2-3.cni.post-start-services + - migrations.2-3.kubelet.post-start-services + - kubelet.update-post-start-services + - require: + - early-services-setup +{%- for master_id in masters.keys() %} + - {{ master_id }}-start-services +{%- endfor %} + +# We remove the grain when we have the last reference to using that grain. +# Otherwise an incomplete subset of minions might be targeted. +{%- for master_id in masters.keys() %} {{ master_id }}-reboot-needed-grain: salt.function: - tgt: '{{ master_id }}' @@ -256,52 +299,83 @@ - kwarg: destructive: True - require: - - {{ master_id }}-start-services - -{% endfor %} + - all-masters-post-start-services +{%- endfor %} -# Perform migrations after all masters have been updated -all-masters-post-start-services: +# NOTE: Remove me for 4.0 +# +# On 2.0 -> 3.0 we are updating the way kubelets auth against the apiservers. +# At this point in time all masters have been updated, and all workers are (or +# will) be in `NotReady` state. This means that any operation that we perform +# that go through the apiserver down to the kubelets won't work (e.g. draining +# nodes). +# +# To fix this problem we'll apply the haproxy sls to all worker nodes, so they +# can rejoin the cluster and we can operate on them normally. +all-workers-2.0-pre-clean-shutdown: salt.state: - - tgt: '{{ is_master_tgt }}' + - tgt: '( {{ is_updateable_worker_tgt }} ) and G@osrelease:2.0' - tgt_type: compound - - batch: 3 - sls: - - cni.update-post-start-services - - kubelet.update-post-start-services + - etc-hosts + - migrations.2-3.haproxy - require: - - early-services-setup + - all-masters-post-start-services {%- for master_id in masters.keys() %} - {{ master_id }}-reboot-needed-grain {%- endfor %} +# Sanity check. If an operator manually rebooted a machine when it had the 3.0 +# snapshot ready, we are already in 3.0 but with an unapplied haproxy config. +# Apply the main haproxy sls to 3.0 workers (if any). +all-workers-3.0-pre-clean-shutdown: + salt.state: + - tgt: '( {{ is_updateable_worker_tgt }} ) and G@osrelease:3.0' + - tgt_type: compound + - sls: + - etc-hosts + - haproxy + - require: + - all-workers-2.0-pre-clean-shutdown +# END NOTE: Remove me for 4.0 + {%- set workers = salt.saltutil.runner('mine.get', tgt=is_updateable_worker_tgt, fun='network.interfaces', tgt_type='compound') %} {%- for worker_id, ip in workers.items() %} # Call the node clean shutdown script -{{ worker_id }}-clean-shutdown: +# Kubelet needs other services, e.g. the cri, up + running. This provide a way +# to ensure kubelet is stopped before any other services. +{{ worker_id }}-early-clean-shutdown: salt.state: - tgt: '{{ worker_id }}' - sls: - - container-feeder.stop + - migrations.2-3.kubelet.drain - kubelet.stop - - kube-proxy.stop - - cri.stop - - etcd.stop - require: - - all-masters-post-start-services + - all-workers-3.0-pre-clean-shutdown # wait until all the masters have been updated {%- for master_id in masters.keys() %} - {{ master_id }}-reboot-needed-grain {%- endfor %} +{{ worker_id }}-clean-shutdown: + salt.state: + - tgt: '{{ worker_id }}' + - sls: + - container-feeder.stop + - kube-proxy.stop + - cri.stop + - etcd.stop + - require: + - {{ worker_id }}-early-clean-shutdown + # Perform any necessary migrations before rebooting {{ worker_id }}-pre-reboot: salt.state: - tgt: '{{ worker_id }}' - sls: - etc-hosts.update-pre-reboot - - cni.update-pre-reboot + - migrations.2-3.cni.pre-reboot - require: - {{ worker_id }}-clean-shutdown @@ -360,7 +434,8 @@ salt.state: - tgt: '{{ worker_id }}' - sls: - - cni.update-post-start-services + - migrations.2-3.cni.post-start-services + - migrations.2-3.kubelet.post-start-services - kubelet.update-post-start-services - require: - {{ worker_id }}-start-services
