This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch v1.6.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.6.x by this push:
new d271348 [CI][1.6.x] fix centos 7 url to unblock centos-cpu & gpu
pipeline (#18560)
d271348 is described below
commit d2713482f9a6a45f1274df87bd34d784a94756ed
Author: Chaitanya Prakash Bapat <[email protected]>
AuthorDate: Mon Jun 15 11:36:33 2020 -0700
[CI][1.6.x] fix centos 7 url to unblock centos-cpu & gpu pipeline (#18560)
* fix centos 7 url to unblock centos-cpu & gpu pipeline
* [v1.7.x] update jetson dockerfile to support CUDA 10.0 (#18339)
* update dockerfile for jetson
* add toolchain files
* update build_jetson function
* update ubuntu_julia.sh
* update FindCUDAToolkit.cmake
* Update centos7_python.sh
* revert changes on ubuntu_julia.sh
* disable TVM for gpu build
* Disable TVM_OP on GPU builds
Co-authored-by: Wei Chu <[email protected]>
Co-authored-by: Leonard Lausen <[email protected]>
* skip quantized conv flaky case (#16866)
* Fix quantized concat when inputs are mixed int8 and uint8
Change-Id: I4da04bf4502425134a466823fb5f73da2d7a419b
* skip flaky test
* trigger ci
Co-authored-by: waytrue17 <[email protected]>
Co-authored-by: Wei Chu <[email protected]>
Co-authored-by: Leonard Lausen <[email protected]>
Co-authored-by: Xinyu Chen <[email protected]>
---
ci/docker/Dockerfile.build.jetson | 96 +++++-----
ci/docker/install/centos7_python.sh | 2 +-
ci/docker/runtime_functions.sh | 68 +++----
.../aarch64-linux-gnu-toolchain.cmake} | 27 +--
.../arm-linux-gnueabihf-toolchain.cmake} | 26 +--
ci/jenkins/Jenkins_steps.groovy | 44 ++---
ci/jenkins/Jenkinsfile_unix_gpu | 7 +-
cmake/Modules/FindCUDAToolkit.cmake | 205 +++++++++++++++------
tests/python/quantization/test_quantization.py | 5 +-
9 files changed, 255 insertions(+), 225 deletions(-)
diff --git a/ci/docker/Dockerfile.build.jetson
b/ci/docker/Dockerfile.build.jetson
index e31ee43..93fe5e0 100644
--- a/ci/docker/Dockerfile.build.jetson
+++ b/ci/docker/Dockerfile.build.jetson
@@ -20,68 +20,58 @@
# This script assumes /work/mxnet exists and contains the mxnet code you wish
to compile and
# that /work/build exists and is the target for your output.
-FROM nvidia/cuda:9.0-cudnn7-devel as cudabuilder
+FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
-FROM dockcross/linux-arm64
+ENV ARCH=aarch64 \
+ HOSTCC=gcc \
+ TARGET=ARMV8
-ENV ARCH aarch64
-ENV HOSTCC gcc
-ENV TARGET ARMV8
+WORKDIR /usr/local
-# gh issue #11567 https://github.com/apache/incubator-mxnet/issues/11567
-#RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security
jessie/updates main#d' /etc/apt/sources.list
-#RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+ build-essential \
+ ninja-build \
+ git \
+ curl \
+ zip \
+ unzip \
+ python3 \
+ python3-pip \
+ awscli \
+ crossbuild-essential-arm64 \
+ && rm -rf /var/lib/apt/lists/*
+# cmake on Ubuntu 18.04 is too old
+RUN python3 -m pip install cmake
-WORKDIR /work/deps
-
-COPY install/ubuntu_arm.sh /work/
-RUN /work/ubuntu_arm.sh
-
-COPY install/arm_openblas.sh /work/
-RUN /work/arm_openblas.sh
-
-ENV OpenBLAS_HOME=${CROSS_ROOT}
-ENV OpenBLAS_DIR=${CROSS_ROOT}
-
+# ccache on Ubuntu 18.04 is too old to support Cuda correctly
COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh
-# Setup CUDA build env (including configuring and copying nvcc)
-COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
-ENV TARGET_ARCH aarch64
-ENV TARGET_OS linux
+COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr
+ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake
+
+RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \
+ cd /usr/local/OpenBLAS && \
+ make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \
+ make PREFIX=/usr/aarch64-linux-gnu install && \
+ cd /usr/local && \
+ rm -rf OpenBLAS
-# Install ARM depedencies based on Jetpack 3.3
-RUN
JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39
&& \
- CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \
- ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \
- ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \
- ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb
&& \
- ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \
- ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \
- ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \
- ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb
&& \
- dpkg --add-architecture arm64 && \
- wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \
- wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \
- wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
- wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \
- wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
- dpkg -i --force-architecture $ARM_CUDA_INSTALLER_PACKAGE && \
- apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \
- dpkg -i --force-architecture $ARM_CUDNN_INSTALLER_PACKAGE && \
- dpkg -i --force-architecture $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
- dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \
- dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \
- dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \
- dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
- apt update -y || true && apt install -y cuda-libraries-dev-9-0
libcudnn7-dev libnvinfer-dev
-RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h
/usr/include/aarch64-linux-gnu/cudnn.h
-ENV PATH $PATH:/usr/local/cuda/bin
-ENV NVCCFLAGS "-m64"
-ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode
arch=compute_62,code=sm_62"
-ENV NVCC /usr/local/cuda/bin/nvcc
+# Install aarch64 cross depedencies based on Jetpack 4.3
+# Manually downloaded using SDK Manager tool and placed in a private S3 bucket.
+# We're not allowed to redistribute these files and there is no public version.
+RUN aws s3 cp
s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb
. && \
+ dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb
&& \
+ rm cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \
+ apt-key add /var/cuda-repo-10-0-local-10.0.326-410.108/7fa2af80.pub && \
+ aws s3 cp
s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb
. && \
+ dpkg -i cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \
+ rm cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \
+ apt-get update && \
+ apt-get install -y -f && \
+ apt-get install -y cuda-cross-aarch64 cuda-cross-aarch64-10-0 && \
+ rm -rf /var/lib/apt/lists/*
ARG USER_ID=0
ARG GROUP_ID=0
diff --git a/ci/docker/install/centos7_python.sh
b/ci/docker/install/centos7_python.sh
index 686cf14..5ca08b8 100755
--- a/ci/docker/install/centos7_python.sh
+++ b/ci/docker/install/centos7_python.sh
@@ -23,7 +23,7 @@
set -ex
# Python 2.7 is installed by default, install 3.6 on top
-yum -y install https://centos7.iuscommunity.org/ius-release.rpm
+yum -y install https://repo.ius.io/ius-release-el7.rpm
yum -y install python36u
# Install PIP
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index d5de024..d8f0cbe 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -249,15 +249,22 @@ build_dynamic_libmxnet() {
build_jetson() {
set -ex
- pushd .
-
- #build_ccache_wrappers
-
- cp make/crosscompile.jetson.mk ./config.mk
- make -j$(nproc)
-
- build_wheel /work/mxnet/python /work/mxnet/lib
- popd
+ cd /work/build
+ cmake \
+ -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \
+ -DUSE_CUDA=ON \
+ -DMXNET_CUDA_ARCH="5.2" \
+ -DENABLE_CUDA_RTC=OFF \
+ -DSUPPORT_F16C=OFF \
+ -DUSE_OPENCV=OFF \
+ -DUSE_OPENMP=ON \
+ -DUSE_LAPACK=OFF \
+ -DUSE_SIGNAL_HANDLER=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DUSE_MKL_IF_AVAILABLE=OFF \
+ -G Ninja /work/mxnet
+ ninja
+ build_wheel
}
#
@@ -772,7 +779,7 @@ build_ubuntu_gpu_mkldnn() {
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
- USE_TVM_OP=1 \
+ USE_TVM_OP=0 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
USE_SIGNAL_HANDLER=1 \
-j$(nproc)
@@ -789,7 +796,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() {
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=0 \
- USE_TVM_OP=1 \
+ USE_TVM_OP=0 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
USE_SIGNAL_HANDLER=1 \
-j$(nproc)
@@ -805,27 +812,6 @@ build_ubuntu_gpu_cuda101_cudnn7() {
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
- USE_TVM_OP=1 \
- USE_CPP_PACKAGE=1 \
- USE_DIST_KVSTORE=1 \
- CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
- USE_SIGNAL_HANDLER=1 \
- -j$(nproc)
-
- make cython PYTHON=python2
- make cython PYTHON=python3
-}
-
-build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() {
- set -ex
- build_ccache_wrappers
- make \
- DEV=1 \
- USE_BLAS=openblas \
- USE_MKLDNN=0 \
- USE_CUDA=1 \
- USE_CUDA_PATH=/usr/local/cuda \
- USE_CUDNN=1 \
USE_TVM_OP=0 \
USE_CPP_PACKAGE=1 \
USE_DIST_KVSTORE=1 \
@@ -867,7 +853,7 @@ build_ubuntu_gpu_cmake_mkldnn() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=1 \
-DUSE_CUDNN=1 \
- -DUSE_TVM_OP=1 \
+ -DUSE_TVM_OP=0 \
-DPython3_EXECUTABLE=/usr/bin/python3 \
-DUSE_MKLML_MKL=1 \
-DCMAKE_BUILD_TYPE=Release \
@@ -892,7 +878,7 @@ build_ubuntu_gpu_cmake() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
- -DUSE_TVM_OP=ON \
+ -DUSE_TVM_OP=OFF \
-DPython3_EXECUTABLE=/usr/bin/python3 \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
@@ -904,17 +890,14 @@ build_ubuntu_gpu_cmake() {
-G Ninja \
/work/mxnet
- ninja -v
+ ninja
}
-build_ubuntu_gpu_cmake_no_tvm_op() {
+build_ubuntu_gpu_cmake_no_rtc() {
set -ex
cd /work/build
build_ccache_wrappers
cmake \
- -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- -DCMAKE_C_COMPILER_LAUNCHER=ccache \
- -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
@@ -922,15 +905,16 @@ build_ubuntu_gpu_cmake_no_tvm_op() {
-DPython3_EXECUTABLE=/usr/bin/python3 \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
- -DUSE_MKLDNN=OFF \
+ -DUSE_MKLDNN=ON \
-DUSE_DIST_KVSTORE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DBUILD_CYTHON_MODULES=1 \
+ -DENABLE_CUDA_RTC=OFF \
-G Ninja \
/work/mxnet
- ninja -v
+ ninja
}
build_ubuntu_cpu_large_tensor() {
@@ -964,7 +948,7 @@ build_ubuntu_gpu_large_tensor() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
- -DUSE_TVM_OP=ON \
+ -DUSE_TVM_OP=OFF \
-DPython3_EXECUTABLE=/usr/bin/python3 \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
diff --git a/ci/docker/install/centos7_python.sh
b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake
old mode 100755
new mode 100644
similarity index 57%
copy from ci/docker/install/centos7_python.sh
copy to ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake
index 686cf14..3780415
--- a/ci/docker/install/centos7_python.sh
+++ b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -17,19 +15,14 @@
# specific language governing permissions and limitations
# under the License.
-# build and install are separated so changes to build don't invalidate
-# the whole docker cache for the image
-
-set -ex
-
- # Python 2.7 is installed by default, install 3.6 on top
-yum -y install https://centos7.iuscommunity.org/ius-release.rpm
-yum -y install python36u
-
-# Install PIP
-curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py"
-python2.7 get-pip.py
-python3.6 get-pip.py
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR "aarch64")
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
+set(CMAKE_CUDA_HOST_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_FIND_ROOT_PATH "/usr/aarch64-linux-gnu")
-pip2 install nose pylint numpy nose-timer requests h5py scipy==1.2.1
decorator==4.4.0
-pip3 install nose pylint numpy nose-timer requests h5py scipy==1.2.1
decorator==4.4.0
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/ci/docker/install/centos7_python.sh
b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake
old mode 100755
new mode 100644
similarity index 57%
copy from ci/docker/install/centos7_python.sh
copy to ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake
index 686cf14..62038ec
--- a/ci/docker/install/centos7_python.sh
+++ b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -17,19 +15,13 @@
# specific language governing permissions and limitations
# under the License.
-# build and install are separated so changes to build don't invalidate
-# the whole docker cache for the image
-
-set -ex
-
- # Python 2.7 is installed by default, install 3.6 on top
-yum -y install https://centos7.iuscommunity.org/ius-release.rpm
-yum -y install python36u
-
-# Install PIP
-curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py"
-python2.7 get-pip.py
-python3.6 get-pip.py
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR "armv7l")
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+set(CMAKE_FIND_ROOT_PATH "/usr/arm-linux-gnueabihf"
"/usr/local/arm-linux-gnueabihf")
-pip2 install nose pylint numpy nose-timer requests h5py scipy==1.2.1
decorator==4.4.0
-pip3 install nose pylint numpy nose-timer requests h5py scipy==1.2.1
decorator==4.4.0
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index be66350..5345c78 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -261,14 +261,14 @@ def compile_unix_full_gpu() {
}]
}
-def compile_unix_full_gpu_no_tvm_op() {
- return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': {
+def compile_unix_full_gpu_mkldnn_cpp_test() {
+ return ['GPU: CUDA10.1+cuDNN7+MKLDNN+CPPTEST': {
node(NODE_LINUX_CPU) {
- ws('workspace/build-gpu-no-tvm-op') {
+ ws('workspace/build-gpu-mkldnn-cpp') {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
- utils.docker_run('ubuntu_build_cuda',
'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
- utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
+ utils.docker_run('ubuntu_build_cuda',
'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
+ utils.pack_lib('gpu_mkldnn_cpp_test', mx_lib_cpp_capi)
}
}
}
@@ -303,16 +303,16 @@ def compile_unix_cmake_gpu() {
}]
}
-def compile_unix_cmake_gpu_no_tvm_op() {
- return ['GPU: CMake TVM_OP OFF': {
- node(NODE_LINUX_CPU) {
- ws('workspace/build-cmake-gpu-no-tvm-op') {
- timeout(time: max_time, unit: 'MINUTES') {
- utils.init_git()
- utils.docker_run('ubuntu_gpu_cu101',
'build_ubuntu_gpu_cmake_no_tvm_op', false)
- }
+def compile_unix_cmake_gpu_no_rtc() {
+ return ['GPU: CMake CUDA RTC OFF': {
+ node(NODE_LINUX_CPU) {
+ ws('workspace/build-cmake-gpu-no-rtc') {
+ timeout(time: max_time, unit: 'MINUTES') {
+ utils.init_git()
+ utils.docker_run('ubuntu_gpu_cu101',
'build_ubuntu_gpu_cmake_no_rtc', false)
+ }
+ }
}
- }
}]
}
@@ -799,22 +799,6 @@ def test_unix_python3_gpu() {
}]
}
-def test_unix_python3_gpu_no_tvm_op() {
- return ['Python3: GPU TVM_OP OFF': {
- node(NODE_LINUX_GPU) {
- ws('workspace/ut-python3-gpu-no-tvm-op') {
- try {
- utils.unpack_and_init('gpu_no_tvm_op',
mx_lib_cpp_examples_no_tvm_op)
- python3_gpu_ut_cython('ubuntu_gpu_cu101')
- utils.publish_test_coverage()
- } finally {
- utils.collect_test_results_unix('nosetests_gpu.xml',
'nosetests_python3_gpu.xml')
- }
- }
- }
- }]
-}
-
def test_unix_python3_quantize_gpu() {
return ['Python3: Quantize GPU': {
node(NODE_LINUX_GPU_P3) {
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index 18e2719..e3ff319 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -41,8 +41,8 @@ core_logic: {
custom_steps.compile_unix_cmake_gpu(),
custom_steps.compile_unix_tensorrt_gpu(),
custom_steps.compile_unix_int64_gpu(),
- custom_steps.compile_unix_full_gpu_no_tvm_op(),
- custom_steps.compile_unix_cmake_gpu_no_tvm_op(),
+ custom_steps.compile_unix_cmake_gpu_no_rtc(),
+ custom_steps.compile_unix_full_gpu_mkldnn_cpp_test()
])
utils.parallel_stage('Tests', [
@@ -63,7 +63,8 @@ core_logic: {
custom_steps.test_unix_scala_gpu(),
custom_steps.test_unix_distributed_kvstore_gpu(),
custom_steps.test_static_python_gpu(),
- custom_steps.test_unix_python3_gpu_no_tvm_op(),
+ custom_steps.test_static_python_gpu_cmake(),
+ custom_steps.test_unix_capi_cpp_package(),
// Disabled due to: https://github.com/apache/incubator-mxnet/issues/11407
//custom_steps.test_unix_caffe_gpu()
diff --git a/cmake/Modules/FindCUDAToolkit.cmake
b/cmake/Modules/FindCUDAToolkit.cmake
index d37c44d..fee4f3f 100644
--- a/cmake/Modules/FindCUDAToolkit.cmake
+++ b/cmake/Modules/FindCUDAToolkit.cmake
@@ -132,6 +132,7 @@ of the following libraries that are part of the CUDAToolkit:
- :ref:`cuRAND<cuda_toolkit_cuRAND>`
- :ref:`cuSOLVER<cuda_toolkit_cuSOLVER>`
- :ref:`cuSPARSE<cuda_toolkit_cuSPARSE>`
+- :ref:`cuPTI<cuda_toolkit_cupti>`
- :ref:`NPP<cuda_toolkit_NPP>`
- :ref:`nvBLAS<cuda_toolkit_nvBLAS>`
- :ref:`nvGRAPH<cuda_toolkit_nvGRAPH>`
@@ -149,7 +150,6 @@ CUDA Runtime Library
The CUDA Runtime library (cudart) are what most applications will typically
need to link against to make any calls such as `cudaMalloc`, and `cudaFree`.
-They are an explicit dependency of almost every library.
Targets Created:
@@ -230,6 +230,18 @@ Targets Created:
- ``CUDA::cusparse``
- ``CUDA::cusparse_static``
+.. _`cuda_toolkit_cupti`:
+
+cupti
+"""""
+
+The `NVIDIA CUDA Profiling Tools Interface
<https://developer.nvidia.com/CUPTI>`_.
+
+Targets Created:
+
+- ``CUDA::cupti``
+- ``CUDA::cupti_static``
+
.. _`cuda_toolkit_NPP`:
NPP
@@ -361,8 +373,6 @@ Targets Created:
- ``CUDA::nvml``
-.. _`cuda_toolkit_opencl`:
-
.. _`cuda_toolkit_nvToolsExt`:
nvToolsExt
@@ -375,6 +385,8 @@ Targets Created:
- ``CUDA::nvToolsExt``
+.. _`cuda_toolkit_opencl`:
+
OpenCL
""""""
@@ -436,6 +448,11 @@ Result variables
The path to the CUDA Toolkit library directory that contains the CUDA
Runtime library ``cudart``.
+``CUDAToolkit_TARGET_DIR``
+ The path to the CUDA Toolkit directory including the target architecture
+ when cross-compiling. When not cross-compiling this will be equivalant to
+ ``CUDAToolkit_ROOT_DIR``.
+
``CUDAToolkit_NVCC_EXECUTABLE``
The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may
**not** be the same as
@@ -487,6 +504,7 @@ if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR)
get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
# use the already detected cuda compiler
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
+ mark_as_advanced(CUDAToolkit_BIN_DIR)
unset(cuda_dir)
endif()
@@ -641,6 +659,7 @@ endif()
if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
+ mark_as_advanced(CUDAToolkit_BIN_DIR)
unset(cuda_dir)
endif()
@@ -669,8 +688,47 @@ endif()
get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY
ABSOLUTE)
-# Now that we have the real ROOT_DIR, find components inside it.
-list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
+# Handle cross compilation
+if(CMAKE_CROSSCOMPILING)
+ if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
+ # Support for NVPACK
+ set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+ # Support for arm cross compilation
+ set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+ # Support for aarch64 cross compilation
+ if (ANDROID_ARCH_NAME STREQUAL "arm64")
+ set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
+ else()
+ set(CUDAToolkit_TARGET_NAME "aarch64-linux")
+ endif (ANDROID_ARCH_NAME STREQUAL "arm64")
+ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ set(CUDAToolkit_TARGET_NAME "x86_64-linux")
+ endif()
+
+ if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
+ set(CUDAToolkit_TARGET_DIR
"${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
+ # add known CUDA target root path to the set of directories we search for
programs, libraries and headers
+ list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
+
+ # Mark that we need to pop the root search path changes after we have
+ # found all cuda libraries so that searches for our cross-compilation
+ # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
+ # PATh
+ set(_CUDAToolkit_Pop_ROOT_PATH True)
+ endif()
+else()
+ # Not cross compiling
+ set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
+ # Now that we have the real ROOT_DIR, find components inside it.
+ list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
+
+ # Mark that we need to pop the prefix path changes after we have
+ # found the cudart library.
+ set(_CUDAToolkit_Pop_Prefix True)
+endif()
+
# Find the include/ directory
find_path(CUDAToolkit_INCLUDE_DIR
@@ -680,14 +738,17 @@ find_path(CUDAToolkit_INCLUDE_DIR
# And find the CUDA Runtime Library libcudart
find_library(CUDA_CUDART
NAMES cudart
- PATH_SUFFIXES lib64 lib/x64
+ PATH_SUFFIXES lib64 lib64/stubs lib/x64
)
if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
message(STATUS "Unable to find cudart library.")
endif()
unset(CUDAToolkit_ROOT_DIR)
-list(REMOVE_AT CMAKE_PREFIX_PATH -1)
+if(_CUDAToolkit_Pop_Prefix)
+ list(REMOVE_AT CMAKE_PREFIX_PATH -1)
+ unset(_CUDAToolkit_Pop_Prefix)
+endif()
#-----------------------------------------------------------------------------
# Perform version comparison and validate all required variables are set.
@@ -702,6 +763,10 @@ find_package_handle_standard_args(CUDAToolkit
VERSION_VAR
CUDAToolkit_VERSION
)
+mark_as_advanced(CUDA_CUDART
+ CUDAToolkit_INCLUDE_DIR
+ CUDAToolkit_NVCC_EXECUTABLE
+ )
#-----------------------------------------------------------------------------
# Construct result variables
@@ -714,78 +779,103 @@ endif()
# Construct import targets
if(CUDAToolkit_FOUND)
- function(find_and_add_cuda_import_lib lib_name)
+ function(_CUDAToolkit_find_and_add_import_lib lib_name)
+ cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN})
- if(ARGC GREATER 1)
- set(search_names ${ARGN})
- else()
- set(search_names ${lib_name})
- endif()
+ set(search_names ${lib_name} ${arg_ALT})
find_library(CUDA_${lib_name}_LIBRARY
NAMES ${search_names}
- PATHS ${CUDAToolkit_LIBRARY_DIR}
+ HINTS ${CUDAToolkit_LIBRARY_DIR}
ENV CUDA_PATH
- PATH_SUFFIXES nvidia/current lib64 lib/x64 lib
+ PATH_SUFFIXES nvidia/current lib64 lib64/stubs lib/x64 lib lib/stubs
stubs
+ ${arg_EXTRA_PATH_SUFFIXES}
)
+ mark_as_advanced(CUDA_${lib_name}_LIBRARY)
- if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
+ if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
add_library(CUDA::${lib_name} IMPORTED INTERFACE)
target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE
"${CUDAToolkit_INCLUDE_DIRS}")
target_link_libraries(CUDA::${lib_name} INTERFACE
"${CUDA_${lib_name}_LIBRARY}")
+ foreach(dep ${arg_DEPS})
+ if(TARGET CUDA::${dep})
+ target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep})
+ endif()
+ endforeach()
endif()
endfunction()
- function(add_cuda_link_dependency lib_name)
- foreach(dependency IN LISTS ${ARGN})
- target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency})
- endforeach()
- endfunction()
+ if(NOT TARGET CUDA::toolkit)
+ add_library(CUDA::toolkit IMPORTED INTERFACE)
+ target_include_directories(CUDA::toolkit SYSTEM INTERFACE
"${CUDAToolkit_INCLUDE_DIRS}")
+ target_link_directories(CUDA::toolkit INTERFACE
"${CUDAToolkit_LIBRARY_DIR}")
+ endif()
- add_library(CUDA::toolkit IMPORTED INTERFACE)
- target_include_directories(CUDA::toolkit SYSTEM INTERFACE
"${CUDAToolkit_INCLUDE_DIRS}")
- target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}")
+ _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda)
+ _CUDAToolkit_find_and_add_import_lib(cudart)
+ _CUDAToolkit_find_and_add_import_lib(cudart_static)
- find_and_add_cuda_import_lib(cuda_driver cuda)
+ # setup dependencies that are required for cudart_static when building
+ # on linux. These are generally only required when using the CUDA toolkit
+ # when CUDA language is disabled
+ if(NOT TARGET CUDA::cudart_static_deps
+ AND TARGET CUDA::cudart_static)
- find_and_add_cuda_import_lib(cudart)
- find_and_add_cuda_import_lib(cudart_static)
+ add_library(CUDA::cudart_static_deps IMPORTED INTERFACE)
+ target_link_libraries(CUDA::cudart_static INTERFACE
CUDA::cudart_static_deps)
- foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph
nvjpeg)
- find_and_add_cuda_import_lib(${cuda_lib})
- add_cuda_link_dependency(${cuda_lib} cudart)
+ if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER))
+ find_package(Threads REQUIRED)
+ target_link_libraries(CUDA::cudart_static_deps INTERFACE
Threads::Threads ${CMAKE_DL_LIBS})
+ endif()
- find_and_add_cuda_import_lib(${cuda_lib}_static)
- add_cuda_link_dependency(${cuda_lib}_static cudart_static)
+ if(UNIX AND NOT APPLE)
+ # On Linux, you must link against librt when using the static cuda
runtime.
+ find_library(CUDAToolkit_rt_LIBRARY rt)
+ mark_as_advanced(CUDAToolkit_rt_LIBRARY)
+ if(NOT CUDAToolkit_rt_LIBRARY)
+ message(WARNING "Could not find librt library, needed by
CUDA::cudart_static")
+ else()
+ target_link_libraries(CUDA::cudart_static_deps INTERFACE
${CUDAToolkit_rt_LIBRARY})
+ endif()
+ endif()
+ endif()
+
+ _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library
+ foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg)
+ _CUDAToolkit_find_and_add_import_lib(${cuda_lib})
+ _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos)
endforeach()
+ # cuFFTW depends on cuFFT
+ _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft)
+ _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static)
+
# cuSOLVER depends on cuBLAS, and cuSPARSE
- add_cuda_link_dependency(cusolver cublas cusparse)
- add_cuda_link_dependency(cusolver_static cublas_static cusparse)
+ _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse)
+ _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static
cusparse_static culibos)
# nvGRAPH depends on cuRAND, and cuSOLVER.
- add_cuda_link_dependency(nvgraph curand cusolver)
- add_cuda_link_dependency(nvgraph_static curand_static cusolver_static)
-
- find_and_add_cuda_import_lib(nppc)
- find_and_add_cuda_import_lib(nppc_static)
-
- add_cuda_link_dependency(nppc cudart)
- add_cuda_link_dependency(nppc_static cudart_static culibos)
+ _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver)
+ _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static
cusolver_static)
# Process the majority of the NPP libraries.
foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps
nppicom nppisu)
- find_and_add_cuda_import_lib(${cuda_lib})
- find_and_add_cuda_import_lib(${cuda_lib}_static)
- add_cuda_link_dependency(${cuda_lib} nppc)
- add_cuda_link_dependency(${cuda_lib}_static nppc_static)
+ _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc)
+ _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static)
endforeach()
- find_and_add_cuda_import_lib(nvrtc)
- add_cuda_link_dependency(nvrtc cuda_driver)
+ _CUDAToolkit_find_and_add_import_lib(cupti
+ EXTRA_PATH_SUFFIXES
../extras/CUPTI/lib64/
+
../extras/CUPTI/lib/)
+ _CUDAToolkit_find_and_add_import_lib(cupti_static
+ EXTRA_PATH_SUFFIXES
../extras/CUPTI/lib64/
+
../extras/CUPTI/lib/)
+
+ _CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver)
- find_and_add_cuda_import_lib(nvml nvidia-ml nvml)
+ _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml)
if(WIN32)
# nvtools can be installed outside the CUDA toolkit directory
@@ -798,17 +888,12 @@ if(CUDAToolkit_FOUND)
PATH_SUFFIXES lib/x64 lib
)
endif()
- find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64)
+ _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64)
- add_cuda_link_dependency(nvToolsExt cudart)
-
- find_and_add_cuda_import_lib(OpenCL)
-
- find_and_add_cuda_import_lib(culibos)
- if(TARGET CUDA::culibos)
- foreach (cuda_lib cublas cufft cusparse curand nvjpeg)
- add_cuda_link_dependency(${cuda_lib}_static culibos)
- endforeach()
- endif()
+ _CUDAToolkit_find_and_add_import_lib(OpenCL)
+endif()
+if(_CUDAToolkit_Pop_ROOT_PATH)
+ list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0)
+ unset(_CUDAToolkit_Pop_ROOT_PATH)
endif()
diff --git a/tests/python/quantization/test_quantization.py
b/tests/python/quantization/test_quantization.py
index 7804f6d..3f0af2a 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -200,8 +200,9 @@ def test_quantized_conv():
if is_test_for_native_cpu():
print('skipped testing quantized_conv for native cpu since it is
not supported yet')
return
- elif qdtype == 'int8' and is_test_for_mkldnn():
- print('skipped testing quantized_conv for mkldnn cpu int8 since it
is not supported yet')
+ elif is_test_for_mkldnn():
+ # (TODO)Xinyu:
https://github.com/apache/incubator-mxnet/issues/16830
+ print('skipped testing quantized_conv for mkldnn cpu since it is a
flaky case')
return
elif qdtype == 'uint8' and is_test_for_gpu():
print('skipped testing quantized_conv for gpu uint8 since it is
not supported yet')