This is an automated email from the ASF dual-hosted git repository.
mousius pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new ca46f21f51 [CI] Increase CPU Intergration tests shards to speedup
runtime (#12316)
ca46f21f51 is described below
commit ca46f21f5168f937cf9b1d205118c770c46aa8c5
Author: Michalis Papadimitriou <[email protected]>
AuthorDate: Fri Aug 5 16:26:29 2022 +0300
[CI] Increase CPU Intergration tests shards to speedup runtime (#12316)
Co-authored-by: Michalis Papapdimitriou <[email protected]>
---
Jenkinsfile | 370 ++++++++++++++++++++++++++++++++++++++++++----
ci/jenkins/Test.groovy.j2 | 2 +-
2 files changed, 340 insertions(+), 32 deletions(-)
diff --git a/Jenkinsfile b/Jenkinsfile
index ec642d60ef..874d4be71a 100755
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -45,7 +45,7 @@
// 'python3 jenkins/generate.py'
// Note: This timestamp is here to ensure that updates to the Jenkinsfile are
// always rebased on main before merging:
-// Generated at 2022-08-04T10:10:32.335972
+// Generated at 2022-08-05T03:11:14.413104
import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
// NOTE: these lines are scanned by docker/dev_common.sh. Please update the
regex as needed. -->
@@ -1337,7 +1337,7 @@ def shard_run_unittest_GPU_3_of_3() {
}
-def shard_run_integration_CPU_1_of_6() {
+def shard_run_integration_CPU_1_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1347,7 +1347,7 @@ def shard_run_integration_CPU_1_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=0'], {
sh(
script: """
@@ -1407,11 +1407,11 @@ def shard_run_integration_CPU_1_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 1 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 1 of 10')
}
}
-def shard_run_integration_CPU_2_of_6() {
+def shard_run_integration_CPU_2_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1421,7 +1421,7 @@ def shard_run_integration_CPU_2_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=1'], {
sh(
script: """
@@ -1481,11 +1481,11 @@ def shard_run_integration_CPU_2_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 2 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 2 of 10')
}
}
-def shard_run_integration_CPU_3_of_6() {
+def shard_run_integration_CPU_3_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1495,7 +1495,7 @@ def shard_run_integration_CPU_3_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=2'], {
sh(
script: """
@@ -1555,11 +1555,11 @@ def shard_run_integration_CPU_3_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 3 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 3 of 10')
}
}
-def shard_run_integration_CPU_4_of_6() {
+def shard_run_integration_CPU_4_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1569,7 +1569,7 @@ def shard_run_integration_CPU_4_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=3'], {
sh(
script: """
@@ -1629,11 +1629,11 @@ def shard_run_integration_CPU_4_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 4 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 4 of 10')
}
}
-def shard_run_integration_CPU_5_of_6() {
+def shard_run_integration_CPU_5_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1643,7 +1643,7 @@ def shard_run_integration_CPU_5_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=4'], {
sh(
script: """
@@ -1703,11 +1703,11 @@ def shard_run_integration_CPU_5_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 5 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 5 of 10')
}
}
-def shard_run_integration_CPU_6_of_6() {
+def shard_run_integration_CPU_6_of_10() {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
@@ -1717,7 +1717,7 @@ def shard_run_integration_CPU_6_of_6() {
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM=cpu',
- 'TVM_NUM_SHARDS=6',
+ 'TVM_NUM_SHARDS=10',
'TVM_SHARD_INDEX=5'], {
sh(
script: """
@@ -1777,7 +1777,303 @@ def shard_run_integration_CPU_6_of_6() {
}
}
} else {
- Utils.markStageSkippedForConditional('integration: CPU 6 of 6')
+ Utils.markStageSkippedForConditional('integration: CPU 6 of 10')
+ }
+}
+
+def shard_run_integration_CPU_7_of_10() {
+ if (!skip_ci && is_docs_only_build != 1) {
+ node('CPU-SMALL') {
+ ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
+ try {
+ docker_init(ci_cpu)
+ init_git()
+ timeout(time: max_time, unit: 'MINUTES') {
+ withEnv([
+ 'PLATFORM=cpu',
+ 'TVM_NUM_SHARDS=10',
+ 'TVM_SHARD_INDEX=6'], {
+ sh(
+ script: """
+ set -eux
+ retry() {
+ local retries=\$1
+ shift
+
+ local count=0
+ until "\$@"; do
+ exit=\$?
+ wait=\$((2 ** \$count))
+ count=\$((\$count + 1))
+ if [ \$count -lt \$retries ]; then
+ echo "Retry \$count/\$retries exited \$exit,
retrying in \$wait seconds..."
+ sleep \$wait
+ else
+ echo "Retry \$count/\$retries exited \$exit,
no more retries left."
+ return \$exit
+ fi
+ done
+ return 0
+ }
+
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
+ md5sum build/libvta_tsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so
+ md5sum build/libtvm.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_fsim.so build/libvta_fsim.so
+ md5sum build/libvta_fsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm_runtime.so build/libtvm_runtime.so
+ md5sum build/libtvm_runtime.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/config.cmake build/config.cmake
+ md5sum build/config.cmake
+ """,
+ label: 'Download artifacts from S3',
+ )
+
+ ci_setup(ci_cpu)
+ sh (
+ script: "${docker_run} ${ci_cpu}
./tests/scripts/task_python_integration.sh",
+ label: 'Run CPU integration tests',
+ )
+ })
+ }
+ } finally {
+ sh(
+ script: """
+ set -eux
+ aws s3 cp --no-progress build/pytest-results
s3://${s3_prefix}/pytest-results --recursive
+ """,
+ label: 'Upload JUnits to S3',
+ )
+
+ junit 'build/pytest-results/*.xml'
+ }
+ }
+ }
+ } else {
+ Utils.markStageSkippedForConditional('integration: CPU 7 of 10')
+ }
+}
+
+def shard_run_integration_CPU_8_of_10() {
+ if (!skip_ci && is_docs_only_build != 1) {
+ node('CPU-SMALL') {
+ ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
+ try {
+ docker_init(ci_cpu)
+ init_git()
+ timeout(time: max_time, unit: 'MINUTES') {
+ withEnv([
+ 'PLATFORM=cpu',
+ 'TVM_NUM_SHARDS=10',
+ 'TVM_SHARD_INDEX=7'], {
+ sh(
+ script: """
+ set -eux
+ retry() {
+ local retries=\$1
+ shift
+
+ local count=0
+ until "\$@"; do
+ exit=\$?
+ wait=\$((2 ** \$count))
+ count=\$((\$count + 1))
+ if [ \$count -lt \$retries ]; then
+ echo "Retry \$count/\$retries exited \$exit,
retrying in \$wait seconds..."
+ sleep \$wait
+ else
+ echo "Retry \$count/\$retries exited \$exit,
no more retries left."
+ return \$exit
+ fi
+ done
+ return 0
+ }
+
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
+ md5sum build/libvta_tsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so
+ md5sum build/libtvm.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_fsim.so build/libvta_fsim.so
+ md5sum build/libvta_fsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm_runtime.so build/libtvm_runtime.so
+ md5sum build/libtvm_runtime.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/config.cmake build/config.cmake
+ md5sum build/config.cmake
+ """,
+ label: 'Download artifacts from S3',
+ )
+
+ ci_setup(ci_cpu)
+ sh (
+ script: "${docker_run} ${ci_cpu}
./tests/scripts/task_python_integration.sh",
+ label: 'Run CPU integration tests',
+ )
+ })
+ }
+ } finally {
+ sh(
+ script: """
+ set -eux
+ aws s3 cp --no-progress build/pytest-results
s3://${s3_prefix}/pytest-results --recursive
+ """,
+ label: 'Upload JUnits to S3',
+ )
+
+ junit 'build/pytest-results/*.xml'
+ }
+ }
+ }
+ } else {
+ Utils.markStageSkippedForConditional('integration: CPU 8 of 10')
+ }
+}
+
+def shard_run_integration_CPU_9_of_10() {
+ if (!skip_ci && is_docs_only_build != 1) {
+ node('CPU-SMALL') {
+ ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
+ try {
+ docker_init(ci_cpu)
+ init_git()
+ timeout(time: max_time, unit: 'MINUTES') {
+ withEnv([
+ 'PLATFORM=cpu',
+ 'TVM_NUM_SHARDS=10',
+ 'TVM_SHARD_INDEX=8'], {
+ sh(
+ script: """
+ set -eux
+ retry() {
+ local retries=\$1
+ shift
+
+ local count=0
+ until "\$@"; do
+ exit=\$?
+ wait=\$((2 ** \$count))
+ count=\$((\$count + 1))
+ if [ \$count -lt \$retries ]; then
+ echo "Retry \$count/\$retries exited \$exit,
retrying in \$wait seconds..."
+ sleep \$wait
+ else
+ echo "Retry \$count/\$retries exited \$exit,
no more retries left."
+ return \$exit
+ fi
+ done
+ return 0
+ }
+
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
+ md5sum build/libvta_tsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so
+ md5sum build/libtvm.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_fsim.so build/libvta_fsim.so
+ md5sum build/libvta_fsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm_runtime.so build/libtvm_runtime.so
+ md5sum build/libtvm_runtime.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/config.cmake build/config.cmake
+ md5sum build/config.cmake
+ """,
+ label: 'Download artifacts from S3',
+ )
+
+ ci_setup(ci_cpu)
+ sh (
+ script: "${docker_run} ${ci_cpu}
./tests/scripts/task_python_integration.sh",
+ label: 'Run CPU integration tests',
+ )
+ })
+ }
+ } finally {
+ sh(
+ script: """
+ set -eux
+ aws s3 cp --no-progress build/pytest-results
s3://${s3_prefix}/pytest-results --recursive
+ """,
+ label: 'Upload JUnits to S3',
+ )
+
+ junit 'build/pytest-results/*.xml'
+ }
+ }
+ }
+ } else {
+ Utils.markStageSkippedForConditional('integration: CPU 9 of 10')
+ }
+}
+
+def shard_run_integration_CPU_10_of_10() {
+ if (!skip_ci && is_docs_only_build != 1) {
+ node('CPU-SMALL') {
+ ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
+ try {
+ docker_init(ci_cpu)
+ init_git()
+ timeout(time: max_time, unit: 'MINUTES') {
+ withEnv([
+ 'PLATFORM=cpu',
+ 'TVM_NUM_SHARDS=10',
+ 'TVM_SHARD_INDEX=9'], {
+ sh(
+ script: """
+ set -eux
+ retry() {
+ local retries=\$1
+ shift
+
+ local count=0
+ until "\$@"; do
+ exit=\$?
+ wait=\$((2 ** \$count))
+ count=\$((\$count + 1))
+ if [ \$count -lt \$retries ]; then
+ echo "Retry \$count/\$retries exited \$exit,
retrying in \$wait seconds..."
+ sleep \$wait
+ else
+ echo "Retry \$count/\$retries exited \$exit,
no more retries left."
+ return \$exit
+ fi
+ done
+ return 0
+ }
+
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so
+ md5sum build/libvta_tsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so
+ md5sum build/libtvm.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libvta_fsim.so build/libvta_fsim.so
+ md5sum build/libvta_fsim.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/libtvm_runtime.so build/libtvm_runtime.so
+ md5sum build/libtvm_runtime.so
+ retry 3 aws s3 cp --no-progress
s3://${s3_prefix}/cpu/build/config.cmake build/config.cmake
+ md5sum build/config.cmake
+ """,
+ label: 'Download artifacts from S3',
+ )
+
+ ci_setup(ci_cpu)
+ sh (
+ script: "${docker_run} ${ci_cpu}
./tests/scripts/task_python_integration.sh",
+ label: 'Run CPU integration tests',
+ )
+ })
+ }
+ } finally {
+ sh(
+ script: """
+ set -eux
+ aws s3 cp --no-progress build/pytest-results
s3://${s3_prefix}/pytest-results --recursive
+ """,
+ label: 'Upload JUnits to S3',
+ )
+
+ junit 'build/pytest-results/*.xml'
+ }
+ }
+ }
+ } else {
+ Utils.markStageSkippedForConditional('integration: CPU 10 of 10')
}
}
@@ -4280,23 +4576,35 @@ stage('Test') {
'unittest: GPU 3 of 3': {
shard_run_unittest_GPU_3_of_3()
},
- 'integration: CPU 1 of 6': {
- shard_run_integration_CPU_1_of_6()
+ 'integration: CPU 1 of 10': {
+ shard_run_integration_CPU_1_of_10()
+ },
+ 'integration: CPU 2 of 10': {
+ shard_run_integration_CPU_2_of_10()
+ },
+ 'integration: CPU 3 of 10': {
+ shard_run_integration_CPU_3_of_10()
+ },
+ 'integration: CPU 4 of 10': {
+ shard_run_integration_CPU_4_of_10()
+ },
+ 'integration: CPU 5 of 10': {
+ shard_run_integration_CPU_5_of_10()
},
- 'integration: CPU 2 of 6': {
- shard_run_integration_CPU_2_of_6()
+ 'integration: CPU 6 of 10': {
+ shard_run_integration_CPU_6_of_10()
},
- 'integration: CPU 3 of 6': {
- shard_run_integration_CPU_3_of_6()
+ 'integration: CPU 7 of 10': {
+ shard_run_integration_CPU_7_of_10()
},
- 'integration: CPU 4 of 6': {
- shard_run_integration_CPU_4_of_6()
+ 'integration: CPU 8 of 10': {
+ shard_run_integration_CPU_8_of_10()
},
- 'integration: CPU 5 of 6': {
- shard_run_integration_CPU_5_of_6()
+ 'integration: CPU 9 of 10': {
+ shard_run_integration_CPU_9_of_10()
},
- 'integration: CPU 6 of 6': {
- shard_run_integration_CPU_6_of_6()
+ 'integration: CPU 10 of 10': {
+ shard_run_integration_CPU_10_of_10()
},
'python: i386 1 of 5': {
shard_run_python_i386_1_of_5()
diff --git a/ci/jenkins/Test.groovy.j2 b/ci/jenkins/Test.groovy.j2
index de2a1b5cfd..ebb19e36e7 100644
--- a/ci/jenkins/Test.groovy.j2
+++ b/ci/jenkins/Test.groovy.j2
@@ -42,7 +42,7 @@
{% call(shard_index, num_shards) m.sharded_test_step(
name="integration: CPU",
node="CPU-SMALL",
- num_shards=6,
+ num_shards=10,
ws="tvm/integration-python-cpu",
platform="cpu",
docker_image="ci_cpu",