KellenSunderland closed pull request #12742: WIP: Jetson trt build
URL: https://github.com/apache/incubator-mxnet/pull/12742
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/Jenkinsfile b/Jenkinsfile
index af059c58e83..de6c7783157 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -30,7 +30,6 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/li
mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests,
build/3rdparty/openmp/runtime/src/libomp.so,
build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so,
lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a,
3rdparty/tvm/nnvm/lib/libnnvm.a'
-mx_tensorrt_lib = 'lib/libmxnet.so, lib/libnvonnxparser_runtime.so.0,
lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a,
3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a,
3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a,
build/cpp-package/example/lenet, build/cpp-package/example/alexnet,
build/cpp-package/example/googlenet,
build/cpp-package/example/lenet_with_mxdataiter,
build/cpp-package/example/resnet, build/cpp-package/example/mlp,
build/cpp-package/example/mlp_cpu, build/cpp-package/example/mlp_gpu,
build/cpp-package/example/test_score, build/cpp-package/example/test_optimizer'
mx_lib_cpp_examples_cpu = 'build/libmxnet.so,
build/cpp-package/example/mlp_cpu'
@@ -353,7 +352,7 @@ core_logic: {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
utils.docker_run('ubuntu_gpu_tensorrt',
'build_ubuntu_gpu_tensorrt', false)
- utils.pack_lib('tensorrt', mx_tensorrt_lib, true)
+ utils.pack_lib('tensorrt', mx_lib, true)
}
}
}
@@ -672,7 +671,7 @@ core_logic: {
ws('workspace/build-tensorrt') {
timeout(time: max_time, unit: 'MINUTES') {
try {
- utils.unpack_and_init('tensorrt', mx_tensorrt_lib, true)
+ utils.unpack_and_init('tensorrt', mx_lib)
utils.docker_run('ubuntu_gpu_tensorrt',
'unittest_ubuntu_tensorrt_gpu', true)
utils.publish_test_coverage()
} finally {
diff --git a/Makefile b/Makefile
index 1c8d70ecc69..6102a1d08d5 100644
--- a/Makefile
+++ b/Makefile
@@ -99,7 +99,7 @@ endif
ifeq ($(USE_TENSORRT), 1)
CFLAGS += -I$(ROOTDIR) -I$(TPARTYDIR)
-DONNX_NAMESPACE=$(ONNX_NAMESPACE) -DMXNET_USE_TENSORRT=1
- LDFLAGS += -lprotobuf -pthread -lonnx -lonnx_proto -lnvonnxparser
-lnvonnxparser_runtime -lnvinfer -lnvinfer_plugin
+ LDFLAGS += -lprotobuf -pthread -lnvinfer -lnvinfer_plugin
endif
# -L/usr/local/lib
diff --git a/ci/docker/Dockerfile.build.jetson
b/ci/docker/Dockerfile.build.jetson
index 4be011af068..28b4221e935 100755
--- a/ci/docker/Dockerfile.build.jetson
+++ b/ci/docker/Dockerfile.build.jetson
@@ -32,7 +32,6 @@ ENV TARGET ARMV8
RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security
jessie/updates main#d' /etc/apt/sources.list
RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list
-
WORKDIR /work/deps
COPY install/ubuntu_arm.sh /work/
@@ -52,20 +51,35 @@ COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
ENV TARGET_ARCH aarch64
ENV TARGET_OS linux
-# Install ARM depedencies based on Jetpack 3.2.1
-RUN
JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.2.1/m8u2ki/JetPackL4T_321_b23
&& \
+COPY install/ubuntu_protobuf.sh /work/
+RUN /work/ubuntu_protobuf.sh
+
+# Install ARM depedencies based on Jetpack 3.3
+RUN
JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39
&& \
+ CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \
ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \
- ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.0.5.15-1+cuda9.0_arm64.deb && \
- ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.0.5.15-1+cuda9.0_arm64.deb
&& \
+ ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \
+ ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb
&& \
+ ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \
+ ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \
+ ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \
+ ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb
&& \
dpkg --add-architecture arm64 && \
wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \
wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \
wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
+ wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \
+ wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
dpkg -i --force-architecture $ARM_CUDA_INSTALLER_PACKAGE && \
- apt-key add /var/cuda-repo-9-0-local/7fa2af80.pub && \
+ apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \
dpkg -i --force-architecture $ARM_CUDNN_INSTALLER_PACKAGE && \
dpkg -i --force-architecture $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
- apt update -y || true && apt install -y cuda-libraries-dev-9-0
libcudnn7-dev
+ dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \
+ dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \
+ dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \
+ dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
+ apt update -y || true && apt install -y cuda-libraries-dev-9-0
libcudnn7-dev libnvinfer-dev
+
ENV PATH $PATH:/usr/local/cuda/bin
ENV NVCCFLAGS "-m64"
ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode
arch=compute_62,code=sm_62"
diff --git a/ci/docker/install/ubuntu_protobuf.sh
b/ci/docker/install/ubuntu_protobuf.sh
new file mode 100755
index 00000000000..012a482a912
--- /dev/null
+++ b/ci/docker/install/ubuntu_protobuf.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Install Protobuf
+pushd .
+cd ..
+apt-get update
+apt-get install -y automake libtool zip
+git clone --recursive -b 3.5.1.1 https://github.com/google/protobuf.git
+
+cd protobuf
+
+# Default AMD64 protobuf target.
+AMD64_PROTOBUF_TARGET=/usr/local
+
+# Custom ARM protobuf target.
+ARM_PROTOBUF_TARGET=/usr/aarch64-linux-gnu
+
+# Install protoc 3.5 and build protobuf here (for onnx and onnx-tensorrt)
+./autogen.sh
+./configure --disable-shared CXXFLAGS=-fPIC --host=amd64 CC=gcc CXX=g++
+make -j$(nproc)
+make install
+
+# Remove dynamic AMD64 protobuf libs to force linker to statically link
+rm -rf $AMD64_PROTOBUF_TARGET/lib/libprotobuf-lite.so*
+rm -rf $AMD64_PROTOBUF_TARGET/lib/libprotobuf.so*
+rm -rf $AMD64_PROTOBUF_TARGET/lib/libprotoc.so*
+
+mkdir -p /usr/local/protobuf/targets/aarch64-linux
+make clean
+./autogen.sh
+./configure --disable-shared CXXFLAGS=-fPIC --host=arm-linux
--with-protoc=/usr/local/bin/protoc --prefix=$ARM_PROTOBUF_TARGET
+make -j$(nproc)
+make install
+
+# Remove dynamic ARM protobuf libs to force linker to statically link
+rm -rf $ARM_PROTOBUF_TARGET/lib/libprotobuf-lite.so*
+rm -rf $ARM_PROTOBUF_TARGET/lib/libprotobuf.so*
+rm -rf $ARM_PROTOBUF_TARGET/lib/libprotoc.so*
+
+ldconfig
+popd
diff --git a/ci/docker/install/tensorrt.sh
b/ci/docker/install/ubuntu_tensorrt.sh
similarity index 73%
rename from ci/docker/install/tensorrt.sh
rename to ci/docker/install/ubuntu_tensorrt.sh
index 61e73ef9a62..8f2cd8d31bf 100755
--- a/ci/docker/install/tensorrt.sh
+++ b/ci/docker/install/ubuntu_tensorrt.sh
@@ -21,24 +21,6 @@
pip2 install gluoncv==0.2.0
pip3 install gluoncv==0.2.0
-# Install Protobuf
-# Install protoc 3.5 and build protobuf here (for onnx and onnx-tensorrt)
-pushd .
-cd ..
-apt-get update
-apt-get install -y automake libtool
-git clone --recursive -b 3.5.1.1 https://github.com/google/protobuf.git
-cd protobuf
-./autogen.sh
-./configure --disable-shared CXXFLAGS=-fPIC
-make -j$(nproc)
-make install
-rm -rf /usr/local/lib/libprotobuf-lite.so*
-rm -rf /usr/local/lib/libprotobuf.so*
-rm -rf /usr/local/lib/libprotoc.so*
-ldconfig
-popd
-
# Install TensorRT
echo "TensorRT build enabled. Installing TensorRT."
wget -qO tensorrt.deb
https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0_1-1_amd64.deb
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 96b1646eff9..4d69fb03b67 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -77,6 +77,8 @@ build_wheel() {
export MXNET_LIBRARY_PATH=${BUILD_DIR}/libmxnet.so
cd ${PYTHON_DIR}
+
+ # If building for redistribution edit the name in this setup.py
python setup.py bdist_wheel --universal
# repackage
@@ -94,6 +96,8 @@ build_wheel() {
rm -rf ${TMPDIR}
popd
+
+ # If redistributing you may now run `twine upload -r pypi *.whl`
}
# Build commands: Every platform in docker/Dockerfile.build.<platform> should
have a corresponding
@@ -103,8 +107,56 @@ build_jetson() {
set -ex
pushd .
+ # Build ONNX
+ pushd .
+ echo "Installing ONNX."
+ cd 3rdparty/onnx-tensorrt/third_party/onnx
+ rm -rf build
+ mkdir -p build
+ cd build
+ cmake \
+ -DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER} \
+ -DProtobuf_LIBRARY=/usr/aarch64-linux-gnu/lib/libprotobuf.a \
+ -DBUILD_SHARED_LIBS=OFF .. \
+ -G Ninja
+ ninja -j 1 -v onnx/onnx.proto
+ ninja -j 1 -v
+ export LIBRARY_PATH=`pwd`:`pwd`/onnx/:$LIBRARY_PATH
+ export CPLUS_INCLUDE_PATH=`pwd`:$CPLUS_INCLUDE_PATH
+ popd
+
+ # Build ONNX-TensorRT
+ pushd .
+ cd 3rdparty/onnx-tensorrt/
+ mkdir -p build
+ cd build
+
+ # Work around cmake findcuda issue by running twice.
+ cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ \
+
-DCUDA_CUDART_LIBRARY=/usr/local/cuda-9.0/targets/aarch64-linux/lib/libcudart.so
\
+
-DCUDA_INCLUDE_DIRS=/usr/local/cuda-9.0/targets/aarch64-linux/include/ \
+
-DCUDA_TOOLKIT_TARGET_DIR=/usr/local/cuda-9.0/targets/aarch64-linux/ \
+ -DProtobuf_LIBRARY=/usr/aarch64-linux-gnu/lib/libprotobuf.a .. || \
+ cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ \
+
-DCUDA_CUDART_LIBRARY=/usr/local/cuda-9.0/targets/aarch64-linux/lib/libcudart.so
\
+
-DCUDA_INCLUDE_DIRS=/usr/local/cuda-9.0/targets/aarch64-linux/include/ \
+
-DCUDA_TOOLKIT_TARGET_DIR=/usr/local/cuda-9.0/targets/aarch64-linux/ \
+ -DProtobuf_LIBRARY=/usr/aarch64-linux-gnu/lib/libprotobuf.a ..
+ make -j$(nproc) gen_onnx_proto
+ make -j$(nproc) nvonnxparser_plugin
+ make -j$(nproc) onnx_proto
+ make -j$(nproc) nvonnxparser_runtime_static
+ make -j$(nproc) nvonnxparser_static
+ make -j$(nproc) nvonnxparser
+ make -j$(nproc) trt_onnxify
+ export LIBRARY_PATH=`pwd`:$LIBRARY_PATH
+ export LIBRARY_PATH=$LIBRARY_PATH:`pwd`/third_party/onnx/
+ mv third_party/onnx/libonnx_proto.a third_party/onnx/libonnxtrt_proto.a
+ popd
+
cp make/crosscompile.jetson.mk ./config.mk
- make -j$(nproc)
+ echo $LIBRARY_PATH
+ make -j $(nproc)
build_wheel /work/mxnet/python /work/mxnet/lib
popd
@@ -495,7 +547,7 @@ build_ubuntu_gpu_tensorrt() {
cd build
cmake \
-DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER}\
- -DBUILD_SHARED_LIBS=ON ..\
+ -DBUILD_SHARED_LIBS=OFF ..\
-G Ninja
ninja -j 1 -v onnx/onnx.proto
ninja -j 1 -v
@@ -511,16 +563,15 @@ build_ubuntu_gpu_tensorrt() {
cmake ..
make -j$(nproc)
export LIBRARY_PATH=`pwd`:$LIBRARY_PATH
+ export LIBRARY_PATH=$LIBRARY_PATH:`pwd`/third_party/onnx/
+ mv third_party/onnx/libonnx_proto.a third_party/onnx/libonnxtrt_proto.a
popd
mkdir -p /work/mxnet/lib/
- cp 3rdparty/onnx-tensorrt/third_party/onnx/build/*.so /work/mxnet/lib/
- cp -L 3rdparty/onnx-tensorrt/build/libnvonnxparser_runtime.so.0
/work/mxnet/lib/
- cp -L 3rdparty/onnx-tensorrt/build/libnvonnxparser.so.0 /work/mxnet/lib/
rm -rf build
make \
- DEV=1 \
+ DEV=0 \
ENABLE_TESTCOVERAGE=1 \
USE_BLAS=openblas \
USE_CUDA=1 \
@@ -532,8 +583,10 @@ build_ubuntu_gpu_tensorrt() {
USE_JEMALLOC=0 \
USE_GPERFTOOLS=0 \
ONNX_NAMESPACE=onnx \
- CUDA_ARCH="-gencode arch=compute_70,code=compute_70"\
+ CUDA_ARCH="-gencode arch=compute_53,code=compute_53"\
-j$(nproc)
+
+ build_wheel /work/mxnet/python /work/mxnet/lib
}
build_ubuntu_gpu_mkldnn() {
diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk
index a1468f4496d..f7df63d86c0 100644
--- a/make/crosscompile.jetson.mk
+++ b/make/crosscompile.jetson.mk
@@ -56,6 +56,12 @@ DEBUG = 0
# whether to turn on segfault signal handler to log the stack trace
USE_SIGNAL_HANDLER = 1
+# Enable TensorRT on Jetson devices (requires nvinfer library installed via
Jetpack)
+USE_TENSORRT = 1
+
+# Set a default namespace for MXNet's native ONNX converter
+ONNX_NAMESPACE=onnx
+
# the additional link flags you want to add
ADD_LDFLAGS = -L${CROSS_ROOT}/lib
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services