This is an automated email from the ASF dual-hosted git repository.

mousius pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 22102063dc [ci] De-duplicate retry functions (#12325)
22102063dc is described below

commit 22102063dccf42c29f9d43ee5684026ba67a3386
Author: driazati <[email protected]>
AuthorDate: Wed Aug 10 01:58:15 2022 -0700

    [ci] De-duplicate retry functions (#12325)
---
 Jenkinsfile                  | 1897 +++++++++++++++++++++---------------------
 ci/jenkins/Prepare.groovy.j2 |   22 +-
 ci/jenkins/macros.j2         |   28 +-
 3 files changed, 969 insertions(+), 978 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index a2fe67d4b5..aa6d29fc2a 100755
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -45,7 +45,7 @@
 // 'python3 jenkins/generate.py'
 // Note: This timestamp is here to ensure that updates to the Jenkinsfile are
 // always rebased on main before merging:
-// Generated at 2022-08-05T17:23:43.942908
+// Generated at 2022-08-05T14:15:15.427777
 
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 // NOTE: these lines are scanned by docker/dev_common.sh. Please update the 
regex as needed. -->
@@ -137,25 +137,30 @@ def init_git() {
   )
 
   sh(
-    script: '''
+    script: """
       set -eux
-      n=0
-      max_retries=3
-      backoff_max=30
-      until [ "$n" -ge $max_retries ]
-      do
-          timeout 5m git submodule update --init -f --jobs 0 && break
-          n=$((n+1))
-          if [ "$n" -eq $max_retries ]; then
-              echo "failed to update $n / $max_retries, giving up"
-              exit 1
-          fi
-
-          WAIT=$((RANDOM % "$backoff_max"))
-          echo "failed to update $n / $max_retries, waiting $WAIT to try again"
-          sleep $WAIT
-      done
-    ''',
+      retry() {
+  local max_retries=\$1
+  shift
+  local n=0
+  local backoff_max=30
+  until [ "\$n" -ge \$max_retries ]
+  do
+      "\$@" && break
+      n=\$((n+1))
+      if [ "\$n" -eq \$max_retries ]; then
+          echo "failed to update after attempt \$n / \$max_retries, giving up"
+          exit 1
+      fi
+
+      WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))')
+      echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again"
+      sleep \$WAIT
+  done
+}
+
+      retry 3 timeout 5m git submodule update --init -f --jobs 0
+    """,
     label: 'Update git submodules',
   )
 }
@@ -185,23 +190,23 @@ def docker_init(image) {
       script: """
       set -eux
       retry() {
-  local retries=\$1
+  local max_retries=\$1
   shift
-
-  local count=0
-  until "\$@"; do
-    exit=\$?
-    wait=\$((2 ** \$count))
-    count=\$((\$count + 1))
-    if [ \$count -lt \$retries ]; then
-      echo "Retry \$count/\$retries exited \$exit, retrying in \$wait 
seconds..."
-      sleep \$wait
-    else
-      echo "Retry \$count/\$retries exited \$exit, no more retries left."
-      return \$exit
-    fi
+  local n=0
+  local backoff_max=30
+  until [ "\$n" -ge \$max_retries ]
+  do
+      "\$@" && break
+      n=\$((n+1))
+      if [ "\$n" -eq \$max_retries ]; then
+          echo "failed to update after attempt \$n / \$max_retries, giving up"
+          exit 1
+      fi
+
+      WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))')
+      echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again"
+      sleep \$WAIT
   done
-  return 0
 }
 
       retry 3 docker pull ${image}
@@ -685,23 +690,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libtvm.so
@@ -725,23 +730,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libtvm.so
@@ -775,23 +780,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libvta_tsim.so
@@ -860,23 +865,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libvta_tsim.so
@@ -914,23 +919,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libtvm.so
@@ -966,23 +971,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libtvm.so
@@ -1021,23 +1026,23 @@ stage('Build') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum build/libtvm.so
@@ -1082,23 +1087,23 @@ def shard_run_unittest_GPU_1_of_3() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu2/build/libtvm.so build/libtvm.so
@@ -1119,23 +1124,23 @@ def shard_run_unittest_GPU_1_of_3() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -1196,23 +1201,23 @@ def shard_run_unittest_GPU_2_of_3() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -1276,23 +1281,23 @@ def shard_run_unittest_GPU_3_of_3() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -1353,23 +1358,23 @@ def shard_run_integration_CPU_1_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1427,23 +1432,23 @@ def shard_run_integration_CPU_2_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1501,23 +1506,23 @@ def shard_run_integration_CPU_3_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1575,23 +1580,23 @@ def shard_run_integration_CPU_4_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1649,23 +1654,23 @@ def shard_run_integration_CPU_5_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1723,23 +1728,23 @@ def shard_run_integration_CPU_6_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1797,23 +1802,23 @@ def shard_run_integration_CPU_7_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1871,23 +1876,23 @@ def shard_run_integration_CPU_8_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -1945,23 +1950,23 @@ def shard_run_integration_CPU_9_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -2019,23 +2024,23 @@ def shard_run_integration_CPU_10_of_10() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -2094,23 +2099,23 @@ def shard_run_python_i386_1_of_5() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so
@@ -2168,23 +2173,23 @@ def shard_run_python_i386_2_of_5() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so
@@ -2242,23 +2247,23 @@ def shard_run_python_i386_3_of_5() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so
@@ -2315,23 +2320,23 @@ def shard_run_python_i386_4_of_5() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so
@@ -2388,23 +2393,23 @@ def shard_run_python_i386_5_of_5() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so
@@ -2462,23 +2467,23 @@ def shard_run_test_Hexagon_1_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2535,23 +2540,23 @@ def shard_run_test_Hexagon_2_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2607,23 +2612,23 @@ def shard_run_test_Hexagon_3_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2679,23 +2684,23 @@ def shard_run_test_Hexagon_4_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2751,23 +2756,23 @@ def shard_run_test_Hexagon_5_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2823,23 +2828,23 @@ def shard_run_test_Hexagon_6_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2895,23 +2900,23 @@ def shard_run_test_Hexagon_7_of_7() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so
@@ -2968,23 +2973,23 @@ def shard_run_integration_aarch64_1_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -3041,23 +3046,23 @@ def shard_run_integration_aarch64_2_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -3114,23 +3119,23 @@ def shard_run_integration_aarch64_3_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -3187,23 +3192,23 @@ def shard_run_integration_aarch64_4_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -3261,23 +3266,23 @@ def shard_run_topi_GPU_1_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3333,23 +3338,23 @@ def shard_run_topi_GPU_2_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3405,23 +3410,23 @@ def shard_run_topi_GPU_3_of_4() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3466,34 +3471,34 @@ def shard_run_topi_GPU_4_of_4() {
     node('GPU') {
       ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/topi-python-gpu") {
         try {
-          docker_init(ci_gpu)
-          init_git()
-          timeout(time: max_time, unit: 'MINUTES') {
-            withEnv([
-              'PLATFORM=gpu',
-              'TVM_NUM_SHARDS=4',
-              'TVM_SHARD_INDEX=3'], {
-              sh(
-                        script: """
-                          set -eux
-                          retry() {
-                            local retries=\$1
-                            shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+          docker_init(ci_gpu)
+          init_git()
+          timeout(time: max_time, unit: 'MINUTES') {
+            withEnv([
+              'PLATFORM=gpu',
+              'TVM_NUM_SHARDS=4',
+              'TVM_SHARD_INDEX=3'], {
+              sh(
+                        script: """
+                          set -eux
+                          retry() {
+                            local max_retries=\$1
+                            shift
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3550,23 +3555,23 @@ def shard_run_frontend_GPU_1_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3622,23 +3627,23 @@ def shard_run_frontend_GPU_2_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3694,23 +3699,23 @@ def shard_run_frontend_GPU_3_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3766,23 +3771,23 @@ def shard_run_frontend_GPU_4_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3838,23 +3843,23 @@ def shard_run_frontend_GPU_5_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3910,23 +3915,23 @@ def shard_run_frontend_GPU_6_of_6() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -3983,23 +3988,23 @@ def shard_run_topi_aarch64_1_of_2() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -4060,23 +4065,23 @@ def shard_run_topi_aarch64_2_of_2() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -4137,23 +4142,23 @@ def shard_run_frontend_aarch64_1_of_2() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -4209,23 +4214,23 @@ def shard_run_frontend_aarch64_2_of_2() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so
@@ -4282,23 +4287,23 @@ def shard_run_test_Cortex_M_1_of_8() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so
@@ -4359,23 +4364,23 @@ def shard_run_test_Cortex_M_2_of_8() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so
@@ -4431,23 +4436,23 @@ def shard_run_test_Cortex_M_3_of_8() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so
@@ -4503,23 +4508,23 @@ def shard_run_test_Cortex_M_4_of_8() {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so
@@ -5021,23 +5026,23 @@ stage('Test') {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
@@ -5094,23 +5099,23 @@ stage('Test') {
                         script: """
                           set -eux
                           retry() {
-                            local retries=\$1
+                            local max_retries=\$1
                             shift
-
-                            local count=0
-                            until "\$@"; do
-                              exit=\$?
-                              wait=\$((2 ** \$count))
-                              count=\$((\$count + 1))
-                              if [ \$count -lt \$retries ]; then
-                                echo "Retry \$count/\$retries exited \$exit, 
retrying in \$wait seconds..."
-                                sleep \$wait
-                              else
-                                echo "Retry \$count/\$retries exited \$exit, 
no more retries left."
-                                return \$exit
-                              fi
+                            local n=0
+                            local backoff_max=30
+                            until [ "\$n" -ge \$max_retries ]
+                            do
+                                "\$@" && break
+                                n=\$((n+1))
+                                if [ "\$n" -eq \$max_retries ]; then
+                                    echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                                    exit 1
+                                fi
+
+                                WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                                echo "failed to update \$n / \$max_retries, 
waiting \$WAIT to try again"
+                                sleep \$WAIT
                             done
-                            return 0
                           }
 
                           retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so
@@ -5159,23 +5164,23 @@ stage('Test') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               retry 3 aws s3 cp --no-progress 
s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so
@@ -5203,23 +5208,23 @@ stage('Test') {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               md5sum docs.tgz
@@ -5329,23 +5334,23 @@ def deploy() {
             script: """
               set -eux
               retry() {
-                local retries=\$1
+                local max_retries=\$1
                 shift
-
-                local count=0
-                until "\$@"; do
-                  exit=\$?
-                  wait=\$((2 ** \$count))
-                  count=\$((\$count + 1))
-                  if [ \$count -lt \$retries ]; then
-                    echo "Retry \$count/\$retries exited \$exit, retrying in 
\$wait seconds..."
-                    sleep \$wait
-                  else
-                    echo "Retry \$count/\$retries exited \$exit, no more 
retries left."
-                    return \$exit
-                  fi
+                local n=0
+                local backoff_max=30
+                until [ "\$n" -ge \$max_retries ]
+                do
+                    "\$@" && break
+                    n=\$((n+1))
+                    if [ "\$n" -eq \$max_retries ]; then
+                        echo "failed to update after attempt \$n / 
\$max_retries, giving up"
+                        exit 1
+                    fi
+
+                    WAIT=\$(python3 -c 'import random; 
print(random.randint(10, 30))')
+                    echo "failed to update \$n / \$max_retries, waiting \$WAIT 
to try again"
+                    sleep \$WAIT
                 done
-                return 0
               }
 
               retry 3 aws s3 cp --no-progress s3://${s3_prefix}/docs/docs.tgz 
docs.tgz
diff --git a/ci/jenkins/Prepare.groovy.j2 b/ci/jenkins/Prepare.groovy.j2
index 8504c0dfb3..7115d39ffc 100644
--- a/ci/jenkins/Prepare.groovy.j2
+++ b/ci/jenkins/Prepare.groovy.j2
@@ -31,25 +31,11 @@ def init_git() {
   )
 
   sh(
-    script: '''
+    script: """
       set -eux
-      n=0
-      max_retries=3
-      backoff_max=30
-      until [ "$n" -ge $max_retries ]
-      do
-          timeout 5m git submodule update --init -f --jobs 0 && break
-          n=$((n+1))
-          if [ "$n" -eq $max_retries ]; then
-              echo "failed to update $n / $max_retries, giving up"
-              exit 1
-          fi
-
-          WAIT=$((RANDOM % "$backoff_max"))
-          echo "failed to update $n / $max_retries, waiting $WAIT to try again"
-          sleep $WAIT
-      done
-    ''',
+      {{ m.bash_retry() }}
+      retry 3 timeout 5m git submodule update --init -f --jobs 0
+    """,
     label: 'Update git submodules',
   )
 }
diff --git a/ci/jenkins/macros.j2 b/ci/jenkins/macros.j2
index 99b7dc1bcd..386d57ebbd 100644
--- a/ci/jenkins/macros.j2
+++ b/ci/jenkins/macros.j2
@@ -113,23 +113,23 @@ def {{ method_name }}() {
 
 {% macro bash_retry() %}
 retry() {
-  local retries=\$1
+  local max_retries=\$1
   shift
+  local n=0
+  local backoff_max=30
+  until [ "\$n" -ge \$max_retries ]
+  do
+      "\$@" && break
+      n=\$((n+1))
+      if [ "\$n" -eq \$max_retries ]; then
+          echo "failed to update after attempt \$n / \$max_retries, giving up"
+          exit 1
+      fi
 
-  local count=0
-  until "\$@"; do
-    exit=\$?
-    wait=\$((2 ** \$count))
-    count=\$((\$count + 1))
-    if [ \$count -lt \$retries ]; then
-      echo "Retry \$count/\$retries exited \$exit, retrying in \$wait 
seconds..."
-      sleep \$wait
-    else
-      echo "Retry \$count/\$retries exited \$exit, no more retries left."
-      return \$exit
-    fi
+      WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))')
+      echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again"
+      sleep \$WAIT
   done
-  return 0
 }
 {% endmacro %}
 

Reply via email to