This is an automated email from the ASF dual-hosted git repository. cjolivier01 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 35ceea7 use first class cuda with cmake 3.9 and cuda9.0 support (#8572) 35ceea7 is described below commit 35ceea73ccc1acfb2ec62cdaa841822e51c13456 Author: Hu Shiwen <yajiedes...@gmail.com> AuthorDate: Sat Nov 11 10:55:44 2017 +0800 use first class cuda with cmake 3.9 and cuda9.0 support (#8572) * use first class cuda with cmake 3.9 and cuda9.0 support fix lapack auto use with openblas * change name --- CMakeLists.txt | 133 ++++++++++++++++------- cmake/ChooseBlas.cmake | 58 ++++++++++ cmake/FirstClassLangCuda.cmake | 236 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 386 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 539515b..af681d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,35 @@ cmake_minimum_required(VERSION 3.0.2) -project(mxnet C CXX) +if((${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} VERSION_EQUAL "3.9.0")) + set(FIRST_CUDA TRUE) +else() + set(FIRST_CUDA FALSE) +endif() +include(cmake/Utils.cmake) -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) - include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) +#Some things have order. This must be put in front alone +mxnet_option(USE_CUDA "Build with CUDA support" ON) +mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF) +if(USE_CUDA) + add_definitions(-DMSHADOW_USE_CUDA=1) + IF(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA)) + set(__cuda_toolset "7.5" "8.0" "9.0") + set(CUDA_TOOLSET "8.0" CACHE STRING "Select CUDA Version.") + set_property( CACHE CUDA_TOOLSET PROPERTY STRINGS "" ${__cuda_toolset} ) + set(CMAKE_GENERATOR_TOOLSET "cuda=${CUDA_TOOLSET},host=x64") + project(mxnet C CXX CUDA) + else() + project(mxnet C CXX) + set(FIRST_CUDA FALSE) + endif() +else() + project(mxnet C CXX) + add_definitions(-DMSHADOW_USE_CUDA=0) endif() -set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") -include(cmake/Utils.cmake) mxnet_option(USE_OPENCV "Build with OpenCV support" ON) mxnet_option(USE_OPENMP "Build with Openmp support" ON) -mxnet_option(USE_CUDA "Build with CUDA support" ON) mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path mxnet_option(USE_LAPACK "Build with lapack support" ON IF NOT MSVC) mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) @@ -29,6 +47,17 @@ mxnet_option(USE_GPROF "Compile with gprof (profiling) flag" OFF) mxnet_option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path mxnet_option(INSTALL_EXAMPLES "Install the example source files." OFF) + + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) + include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) +endif() + +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") + + + + SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") if("$ENV{VERBOSE}" STREQUAL "1") @@ -128,14 +157,20 @@ endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) -if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake) - include(mshadow/cmake/mshadow.cmake) +if(FIRST_CUDA) + include(cmake/ChooseBlas.cmake) include(mshadow/cmake/Utils.cmake) - include(mshadow/cmake/Cuda.cmake) + include(cmake/FirstClassLangCuda.cmake) else() - include(mshadowUtils) - include(Cuda) - include(mshadow) + if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake) + include(mshadow/cmake/mshadow.cmake) + include(mshadow/cmake/Utils.cmake) + include(mshadow/cmake/Cuda.cmake) + else() + include(mshadowUtils) + include(Cuda) + include(mshadow) + endif() endif() list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS}) @@ -241,7 +276,7 @@ if(USE_LAPACK) list(APPEND mxnet_LINKER_LIBS lapack) else(USE_LAPACK) # Workaround for Windows until using new Jenkinsfile. - if(USE_BLAS STREQUAL "open") + if(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") add_definitions(-DMXNET_USE_LAPACK=1) endif() endif() @@ -372,36 +407,46 @@ if(MSVC) endif() if(USE_CUDA) - list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES}) - # define preprocessor macro so that we will not include the generated forcelink header - mshadow_cuda_compile(cuda_objs ${CUDA}) - if(MSVC) - FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) - set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") - list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY}) - FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator - FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver - else(MSVC) - list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver) - link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") - endif() - list(APPEND SOURCE ${cuda_objs} ${CUDA}) - add_definitions(-DMXNET_USE_CUDA=1) - if(CUDA_LIBRARY_PATH) - if(IS_CONTAINER_BUILD) - # In case of building on a production-like build container which may not have Cuda installed - if(NOT CMAKE_SYSTEM_HAS_CUDA) - # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine) - # so use the stub cuda driver shared library - if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so) - link_directories(${CUDA_LIBRARY_PATH}/stubs) + if(FIRST_CUDA) + mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH) + string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}") + set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}") + set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math") + list(APPEND mxnet_LINKER_LIBS nvrtc cuda cublas cufft cusolver curand) + list(APPEND SOURCE ${CUDA}) + add_definitions(-DMXNET_USE_CUDA=1) + else() + list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES}) + # define preprocessor macro so that we will not include the generated forcelink header + mshadow_cuda_compile(cuda_objs ${CUDA}) + if(MSVC) + FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) + set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") + list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY}) + FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator + FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver + else(MSVC) + list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + list(APPEND SOURCE ${cuda_objs} ${CUDA}) + add_definitions(-DMXNET_USE_CUDA=1) + if(CUDA_LIBRARY_PATH) + if(IS_CONTAINER_BUILD) + # In case of building on a production-like build container which may not have Cuda installed + if(NOT CMAKE_SYSTEM_HAS_CUDA) + # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine) + # so use the stub cuda driver shared library + if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so) + link_directories(${CUDA_LIBRARY_PATH}/stubs) + endif() + endif() endif() - endif() endif() - endif() + endif() endif() # unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well @@ -444,6 +489,12 @@ else() endif() endif() +if(USE_CUDA) + if(FIRST_CUDA) + target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd>") + target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT>") + endif() +endif() if(USE_DIST_KVSTORE) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt) add_subdirectory("ps-lite") diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake new file mode 100644 index 0000000..3a8723a --- /dev/null +++ b/cmake/ChooseBlas.cmake @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(BLAS "Open" CACHE STRING "Selected BLAS library") +set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL") + +if(USE_MKL_IF_AVAILABLE) + if(NOT MKL_FOUND) + find_package(MKL) + endif() + if(MKL_FOUND) + if(USE_MKLML_MKL) + set(BLAS "open") + else() + set(BLAS "MKL") + endif() + endif() +endif() + +if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas") + find_package(Atlas REQUIRED) + include_directories(SYSTEM ${Atlas_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES}) + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) +elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") + find_package(OpenBLAS REQUIRED) + include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB}) + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) +elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl") + find_package(MKL REQUIRED) + include_directories(SYSTEM ${MKL_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES}) + add_definitions(-DMSHADOW_USE_CBLAS=0) + add_definitions(-DMSHADOW_USE_MKL=1) +elseif(BLAS STREQUAL "apple") + find_package(Accelerate REQUIRED) + include_directories(SYSTEM ${Accelerate_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES}) + add_definitions(-DMSHADOW_USE_MKL=0) + add_definitions(-DMSHADOW_USE_CBLAS=1) +endif() \ No newline at end of file diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake new file mode 100644 index 0000000..73f0758 --- /dev/null +++ b/cmake/FirstClassLangCuda.cmake @@ -0,0 +1,236 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this file is CUDA help function with CMAKE first class CUDA + +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) + +################################################################################################ +# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution. +# That's why not FindcuDNN.cmake file, but just the macro +# Usage: +# detect_cuDNN() +function(detect_cuDNN) + set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder") + + find_path(CUDNN_INCLUDE cudnn.h + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} + DOC "Path to cuDNN include directory." ) + + + find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} + DOC "Path to cuDNN library.") + + if(CUDNN_INCLUDE AND CUDNN_LIBRARY) + set(HAVE_CUDNN TRUE PARENT_SCOPE) + set(CUDNN_FOUND TRUE PARENT_SCOPE) + + mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) + message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") + endif() +endfunction() + + + +################################################################################################ +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# mshadow_detect_installed_gpus(out_variable) +function(mshadow_detect_installed_gpus out_variable) + if(NOT CUDA_gpu_detect_output) + set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) + + file(WRITE ${__cufile} "" + "#include <cstdio>\n" + "int main()\n" + "{\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device)\n" + " {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + enable_language(CUDA) + + try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${file} + COMPILE_OUTPUT_VARIABLE __compile_out + RUN_OUTPUT_VARIABLE __nvcc_out) + + if(__nvcc_res EQUAL 0 AND __compile_result) + # nvcc outputs text containing line breaks when building with MSVC. + # The line below prevents CMake from inserting a variable with line + # breaks in the cache + string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") + string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") + set(CUDA_gpu_detect_output ${__nvcc_out}) + else() + message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out} ${__compile_out}") + endif() + endif() + + if(NOT CUDA_gpu_detect_output) + message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mshadow_known_gpu_archs}).") + set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE) + else() + set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) + endif() +endfunction() + + +# This list will be used for CUDA_ARCH_NAME = All option +set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell") + +# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) +set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") + +if (CUDA_TOOLSET VERSION_GREATER "6.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2") +endif () + +if (CUDA_TOOLSET VERSION_GREATER "7.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX") +else() + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") +endif () + +################################################################################################ +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME +# Usage: +# mshadow_select_nvcc_arch_flags(out_variable) +function(mshadow_select_nvcc_arch_flags out_variable) + + set(CUDA_ARCH_LIST "All" CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "All" ${CUDA_KNOWN_GPU_ARCHITECTURES} ) + mark_as_advanced(CUDA_ARCH_NAME) + + + if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) + set(CUDA_ARCH_LIST "All") + endif() + + set(cuda_arch_bin) + set(cuda_arch_ptx) + + if("${CUDA_ARCH_LIST}" STREQUAL "All") + set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") + set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto") + mshadow_detect_installed_gpus(CUDA_ARCH_LIST) + message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") + endif() + + # Now process the list and look for names + string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") + list(REMOVE_DUPLICATES CUDA_ARCH_LIST) + foreach(arch_name ${CUDA_ARCH_LIST}) + set(arch_bin) + set(arch_ptx) + set(add_ptx FALSE) + # Check to see if we are compiling PTX + if(arch_name MATCHES "(.*)\\+PTX$") + set(add_ptx TRUE) + set(arch_name ${CMAKE_MATCH_1}) + endif() + if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") + set(arch_bin ${CMAKE_MATCH_1}) + set(arch_ptx ${arch_bin}) + else() + # Look for it in our list of known architectures + if(${arch_name} STREQUAL "Fermi") + if (CUDA_TOOLSET VERSION_LESS "8.0") + set(arch_bin 2.0 "2.1(2.0)") + endif() + elseif(${arch_name} STREQUAL "Kepler+Tegra") + set(arch_bin 3.2) + elseif(${arch_name} STREQUAL "Kepler+Tesla") + set(arch_bin 3.7) + elseif(${arch_name} STREQUAL "Kepler") + set(arch_bin 3.0 3.5) + set(arch_ptx 3.5) + elseif(${arch_name} STREQUAL "Maxwell+Tegra") + set(arch_bin 5.3) + elseif(${arch_name} STREQUAL "Maxwell") + set(arch_bin 5.0 5.2) + set(arch_ptx 5.2) + elseif(${arch_name} STREQUAL "Pascal") + set(arch_bin 6.0 6.1) + set(arch_ptx 6.1) + else() + message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") + endif() + endif() + list(APPEND cuda_arch_bin ${arch_bin}) + if(add_ptx) + if (NOT arch_ptx) + set(arch_ptx ${arch_bin}) + endif() + list(APPEND cuda_arch_ptx ${arch_ptx}) + endif() + endforeach() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + + if(cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_bin) + endif() + if(cuda_arch_ptx) + list(REMOVE_DUPLICATES cuda_arch_ptx) + endif() + + message(STATUS "cuda arch bin: ${cuda_arch_bin}") + message(STATUS "cuda arch ptx: ${cuda_arch_ptx}") + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified ARCH for the concrete CODE + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) +endfunction() + -- To stop receiving notification emails like this one, please contact ['"comm...@mxnet.apache.org" <comm...@mxnet.apache.org>'].