This is an automated email from the ASF dual-hosted git repository.

lhotari pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pulsar-helm-chart.git


The following commit(s) were added to refs/heads/master by this push:
     new d3589b1  [CI] Improve logging in CI scripts and add timeouts (#195)
d3589b1 is described below

commit d3589b1c82c651478b9a906bb45127be60545e3d
Author: Lari Hotari <[email protected]>
AuthorDate: Tue Jan 4 17:24:28 2022 +0200

    [CI] Improve logging in CI scripts and add timeouts (#195)
    
    - show events every 15 seconds
    - dump logs every 5 minutes
    - dump logs when timeouting
---
 .ci/helm.sh | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/.ci/helm.sh b/.ci/helm.sh
index 55df7ca..a967f38 100644
--- a/.ci/helm.sh
+++ b/.ci/helm.sh
@@ -29,6 +29,28 @@ NAMESPACE=pulsar
 CLUSTER=pulsar-ci
 CLUSTER_ID=$(uuidgen)
 
+# brew package 'coreutils' is required on MacOSX
+# coreutils includes the 'timeout' command
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    brew_gnubin_packages=(coreutils)
+    if ! type -P brew &>/dev/null; then
+        echo "On MacOSX, you must install required binaries with the following 
command:"
+        echo "brew install" "${brew_gnubin_packages[@]}"
+        exit 1
+    fi
+    for dep in "${brew_gnubin_packages[@]}"; do
+        path_element="$(brew --prefix)/opt/${dep}/libexec/gnubin"
+        if [ ! -d "${path_element}" ]; then
+            echo "'${path_element}' is missing. Quick fix: 'brew install 
${dep}'."
+            echo "On MacOSX, you must install required binaries with the 
following command:"
+            echo "brew install" "${brew_gnubin_packages[@]}"
+            exit 1
+        fi
+        PATH="${path_element}:$PATH"
+    done
+    export PATH
+fi
+
 function ci::create_cluster() {
     echo "Creating a kind cluster ..."
     ${CHARTS_HOME}/hack/kind-cluster-build.sh --name pulsar-ci-${CLUSTER_ID} 
-c 1 -v 10
@@ -65,11 +87,19 @@ function ci::install_cert_manager() {
       echo ${WC};
       sleep 15
       ${KUBECTL} get pods -n cert-manager
+      ${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
       WC=$(${KUBECTL} get pods -n cert-manager 
--field-selector=status.phase=Running | wc -l)
     done
     echo "Successfully installed the cert manager."
 }
 
+function ci::print_pod_logs() {
+    echo "Logs for all pulsar containers:"
+    for pod in $(${KUBECTL} get pods -n ${NAMESPACE} -l app=pulsar -o=name); do
+      ${KUBECTL} logs -n ${NAMESPACE} "$pod" --all-containers=true 
--ignore-errors=true --prefix=true --tail=100 || true
+    done;
+}
+
 function ci::install_pulsar_chart() {
     local value_file=$1
     local extra_opts=$2
@@ -87,10 +117,20 @@ function ci::install_pulsar_chart() {
 
     echo "wait until broker is alive"
     WC=$(${KUBECTL} get pods -n ${NAMESPACE} 
--field-selector=status.phase=Running | grep ${CLUSTER}-broker | wc -l)
+    counter=1
     while [[ ${WC} -lt 1 ]]; do
+      ((counter++))
       echo ${WC};
       sleep 15
       ${KUBECTL} get pods -n ${NAMESPACE}
+      ${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
+      if [[ $((counter % 20)) -eq 0 ]]; then
+        ci::print_pod_logs
+        if [[ $counter -gt 100 ]]; then
+          echo >&2 "Timeout waiting..."
+          exit 1
+        fi
+      fi
       WC=$(${KUBECTL} get pods -n ${NAMESPACE} | grep ${CLUSTER}-broker | wc 
-l)
       if [[ ${WC} -gt 1 ]]; then
         ${KUBECTL} describe pod -n ${NAMESPACE} pulsar-ci-broker-0
@@ -98,17 +138,27 @@ function ci::install_pulsar_chart() {
       fi
       WC=$(${KUBECTL} get pods -n ${NAMESPACE} 
--field-selector=status.phase=Running | grep ${CLUSTER}-broker | wc -l)
     done
-    ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until 
nslookup pulsar-ci-broker; do sleep 3; done'
-    ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until [ 
"$(curl -L http://pulsar-ci-broker:8080/status.html)" == "OK" ]; do sleep 3; 
done'
+    timeout 300s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash 
-c 'until nslookup pulsar-ci-broker; do sleep 3; done' || { echo >&2 "Timeout 
waiting..."; ci::print_pod_logs; exit 1; }
+    timeout 120s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash 
-c 'until [ "$(curl -L http://pulsar-ci-broker:8080/status.html)" == "OK" ]; do 
sleep 3; done' || { echo >&2 "Timeout waiting..."; ci::print_pod_logs; exit 1; }
 
     WC=$(${KUBECTL} get pods -n ${NAMESPACE} 
--field-selector=status.phase=Running | grep ${CLUSTER}-proxy | wc -l)
+    counter=1
     while [[ ${WC} -lt 1 ]]; do
+      ((counter++))
       echo ${WC};
       sleep 15
       ${KUBECTL} get pods -n ${NAMESPACE}
+      ${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
+      if [[ $((counter % 8)) -eq 0 ]]; then
+        ci::print_pod_logs
+        if [[ $counter -gt 16 ]]; then
+          echo >&2 "Timeout waiting..."
+          exit 1
+        fi
+      fi
       WC=$(${KUBECTL} get pods -n ${NAMESPACE} 
--field-selector=status.phase=Running | grep ${CLUSTER}-proxy | wc -l)
     done
-    ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until 
nslookup pulsar-ci-proxy; do sleep 3; done'
+    timeout 300s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash 
-c 'until nslookup pulsar-ci-proxy; do sleep 3; done' || { echo >&2 "Timeout 
waiting..."; ci::print_pod_logs; exit 1; }
     # ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until [ 
"$(curl -L http://pulsar-ci-proxy:8080/status.html)" == "OK" ]; do sleep 3; 
done'
 }
 
@@ -131,6 +181,7 @@ function ci::wait_function_running() {
       echo ${num_running}
       sleep 15
       ${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running
+      ${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
       num_running=$(${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- 
bash -c 'bin/pulsar-admin functions status --tenant pulsar-ci --namespace test 
--name test-function | bin/jq .numRunning') 
     done
 }

Reply via email to