[incubator-mxnet] branch master updated: use first class cuda with cmake 3.9 and cuda9.0 support (#8572)

cjolivier01 Fri, 10 Nov 2017 18:56:29 -0800

This is an automated email from the ASF dual-hosted git repository.

cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/master by this push:
     new 35ceea7  use first class cuda with cmake 3.9 and cuda9.0 support 
(#8572)
35ceea7 is described below

commit 35ceea73ccc1acfb2ec62cdaa841822e51c13456
Author: Hu Shiwen <yajiedes...@gmail.com>
AuthorDate: Sat Nov 11 10:55:44 2017 +0800

    use first class cuda with cmake 3.9 and cuda9.0 support (#8572)
    
    * use first class cuda with cmake 3.9 and cuda9.0 support
    fix lapack auto use with openblas
    
    * change name
---
 CMakeLists.txt                 | 133 ++++++++++++++++-------
 cmake/ChooseBlas.cmake         |  58 ++++++++++
 cmake/FirstClassLangCuda.cmake | 236 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 386 insertions(+), 41 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 539515b..af681d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,17 +1,35 @@
 cmake_minimum_required(VERSION 3.0.2)
 
-project(mxnet C CXX)
+if((${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} 
VERSION_EQUAL "3.9.0"))
+  set(FIRST_CUDA TRUE)
+else()
+  set(FIRST_CUDA FALSE)
+endif()
+include(cmake/Utils.cmake)
 
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
-  include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+#Some things have order. This must be put in front alone
+mxnet_option(USE_CUDA             "Build with CUDA support"   ON)
+mxnet_option(USE_OLDCMAKECUDA           "Build with old cmake cuda" OFF)
+if(USE_CUDA)
+  add_definitions(-DMSHADOW_USE_CUDA=1)
+  IF(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA))
+    set(__cuda_toolset "7.5" "8.0" "9.0")
+    set(CUDA_TOOLSET "8.0" CACHE STRING "Select CUDA Version.")
+    set_property( CACHE CUDA_TOOLSET PROPERTY STRINGS "" ${__cuda_toolset} )
+    set(CMAKE_GENERATOR_TOOLSET "cuda=${CUDA_TOOLSET},host=x64")
+    project(mxnet C CXX CUDA)
+  else()
+    project(mxnet C CXX)
+    set(FIRST_CUDA FALSE)
+  endif()
+else()
+  project(mxnet C CXX)
+  add_definitions(-DMSHADOW_USE_CUDA=0)
 endif()
 
-set(CMAKE_MODULE_PATH 
"${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}")
 
-include(cmake/Utils.cmake)
 mxnet_option(USE_OPENCV           "Build with OpenCV support" ON)
 mxnet_option(USE_OPENMP           "Build with Openmp support" ON)
-mxnet_option(USE_CUDA             "Build with CUDA support"   ON)
 mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could 
set CUDNN_ROOT for search path
 mxnet_option(USE_LAPACK           "Build with lapack support" ON IF NOT MSVC)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
@@ -29,6 +47,17 @@ mxnet_option(USE_GPROF            "Compile with gprof 
(profiling) flag" OFF)
 mxnet_option(USE_VTUNE            "Enable use of Intel Amplifier XE (VTune)" 
OFF) # one could set VTUNE_ROOT for search path
 mxnet_option(INSTALL_EXAMPLES     "Install the example source files." OFF)
 
+
+
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+  include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+endif()
+
+set(CMAKE_MODULE_PATH 
"${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}")
+
+
+
+
 SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")
 
 if("$ENV{VERBOSE}" STREQUAL "1")
@@ -128,14 +157,20 @@ endif()
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake)
-  include(mshadow/cmake/mshadow.cmake)
+if(FIRST_CUDA)
+  include(cmake/ChooseBlas.cmake)
   include(mshadow/cmake/Utils.cmake)
-  include(mshadow/cmake/Cuda.cmake)
+  include(cmake/FirstClassLangCuda.cmake)
 else()
-  include(mshadowUtils)
-  include(Cuda)
-  include(mshadow)
+  if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake)
+    include(mshadow/cmake/mshadow.cmake)
+    include(mshadow/cmake/Utils.cmake)
+    include(mshadow/cmake/Cuda.cmake)
+  else()
+    include(mshadowUtils)
+    include(Cuda)
+    include(mshadow)
+  endif()
 endif()
 
 list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS})
@@ -241,7 +276,7 @@ if(USE_LAPACK)
   list(APPEND mxnet_LINKER_LIBS lapack)
 else(USE_LAPACK)
   # Workaround for Windows until using new Jenkinsfile.
-  if(USE_BLAS STREQUAL "open")
+  if(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
     add_definitions(-DMXNET_USE_LAPACK=1)
   endif()
 endif()
@@ -372,36 +407,46 @@ if(MSVC)
 endif()
 
 if(USE_CUDA)
-  list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
-  # define preprocessor macro so that we will not include the generated 
forcelink header
-  mshadow_cuda_compile(cuda_objs ${CUDA})
-  if(MSVC)
-    FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  
"${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
-    set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
-    list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
-    FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  
"${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For 
fft operator
-    FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc 
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") 
# For cusolver
-  else(MSVC)
-    list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
-    link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
-  endif()
-  list(APPEND SOURCE ${cuda_objs} ${CUDA})
-  add_definitions(-DMXNET_USE_CUDA=1)
-  if(CUDA_LIBRARY_PATH)
-    if(IS_CONTAINER_BUILD)
-      # In case of building on a production-like build container which may not 
have Cuda installed
-      if(NOT CMAKE_SYSTEM_HAS_CUDA)
-        # Assuming building in a container that doesn't have CUDA installed 
(ie CPU-only build machine)
-        # so use the stub cuda driver shared library
-        if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so)
-          link_directories(${CUDA_LIBRARY_PATH}/stubs)
+  if(FIRST_CUDA)
+    mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH)
+    string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
+    set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
+    set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math")
+    list(APPEND mxnet_LINKER_LIBS nvrtc cuda cublas cufft cusolver curand)
+    list(APPEND SOURCE ${CUDA})
+    add_definitions(-DMXNET_USE_CUDA=1)
+  else()
+    list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
+    # define preprocessor macro so that we will not include the generated 
forcelink header
+    mshadow_cuda_compile(cuda_objs ${CUDA})
+    if(MSVC)
+        FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc 
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
+        set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
+        list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
+        FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc 
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # 
For fft operator
+        FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc 
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS 
"${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
+    else(MSVC)
+        list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
+        link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+    endif()
+    list(APPEND SOURCE ${cuda_objs} ${CUDA})
+    add_definitions(-DMXNET_USE_CUDA=1)
+    if(CUDA_LIBRARY_PATH)
+        if(IS_CONTAINER_BUILD)
+        # In case of building on a production-like build container which may 
not have Cuda installed
+        if(NOT CMAKE_SYSTEM_HAS_CUDA)
+            # Assuming building in a container that doesn't have CUDA 
installed (ie CPU-only build machine)
+            # so use the stub cuda driver shared library
+            if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so)
+            link_directories(${CUDA_LIBRARY_PATH}/stubs)
+            endif()
+        endif()
         endif()
-      endif()
     endif()
-  endif()
+ endif()
 endif()
 
 # unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as 
well
@@ -444,6 +489,12 @@ else()
   endif()
 endif()
 
+if(USE_CUDA)
+  if(FIRST_CUDA)
+    target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd>")
+    target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT>")
+  endif()
+endif()
 if(USE_DIST_KVSTORE)
   if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
     add_subdirectory("ps-lite")
diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake
new file mode 100644
index 0000000..3a8723a
--- /dev/null
+++ b/cmake/ChooseBlas.cmake
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(BLAS "Open" CACHE STRING "Selected BLAS library")
+set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL")
+
+if(USE_MKL_IF_AVAILABLE)
+  if(NOT MKL_FOUND)
+    find_package(MKL)
+  endif()
+  if(MKL_FOUND)
+    if(USE_MKLML_MKL)
+      set(BLAS "open")
+    else()
+      set(BLAS "MKL")
+    endif()
+  endif()
+endif()
+
+if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
+  find_package(Atlas REQUIRED)
+  include_directories(SYSTEM ${Atlas_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+  add_definitions(-DMSHADOW_USE_MKL=0)
+elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
+  find_package(OpenBLAS REQUIRED)
+  include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB})
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+  add_definitions(-DMSHADOW_USE_MKL=0)
+elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
+  find_package(MKL REQUIRED)
+  include_directories(SYSTEM ${MKL_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_CBLAS=0)
+  add_definitions(-DMSHADOW_USE_MKL=1)
+elseif(BLAS STREQUAL "apple")
+  find_package(Accelerate REQUIRED)
+  include_directories(SYSTEM ${Accelerate_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_MKL=0)
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+endif()
\ No newline at end of file
diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake
new file mode 100644
index 0000000..73f0758
--- /dev/null
+++ b/cmake/FirstClassLangCuda.cmake
@@ -0,0 +1,236 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#this file is CUDA help function with CMAKE first class CUDA
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-std=c++11"   SUPPORT_CXX11)
+
+################################################################################################
+# Short command for cuDNN detection. Believe it soon will be a part of CUDA 
toolkit distribution.
+# That's why not FindcuDNN.cmake file, but just the macro
+# Usage:
+#   detect_cuDNN()
+function(detect_cuDNN)
+  set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
+
+  find_path(CUDNN_INCLUDE cudnn.h
+            PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT}
+            DOC "Path to cuDNN include directory." )
+
+
+  find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
+                             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} 
${CUDNN_INCLUDE}
+                             DOC "Path to cuDNN library.")
+
+  if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
+    set(HAVE_CUDNN  TRUE PARENT_SCOPE)
+    set(CUDNN_FOUND TRUE PARENT_SCOPE)
+
+    mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
+    message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: 
${CUDNN_LIBRARY})")
+  endif()
+endfunction()
+
+
+
+################################################################################################
+# A function for automatic detection of GPUs installed  (if autodetection is 
enabled)
+# Usage:
+#   mshadow_detect_installed_gpus(out_variable)
+function(mshadow_detect_installed_gpus out_variable)
+  if(NOT CUDA_gpu_detect_output)
+    set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
+
+    file(WRITE ${__cufile} ""
+      "#include <cstdio>\n"
+      "int main()\n"
+      "{\n"
+      "  int count = 0;\n"
+      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
+      "  if (count == 0) return -1;\n"
+      "  for (int device = 0; device < count; ++device)\n"
+      "  {\n"
+      "    cudaDeviceProp prop;\n"
+      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
+      "      std::printf(\"%d.%d \", prop.major, prop.minor);\n"
+      "  }\n"
+      "  return 0;\n"
+      "}\n")
+    enable_language(CUDA)
+
+    try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${file}
+            COMPILE_OUTPUT_VARIABLE __compile_out
+            RUN_OUTPUT_VARIABLE __nvcc_out)
+
+    if(__nvcc_res EQUAL 0 AND __compile_result)
+      # nvcc outputs text containing line breaks when building with MSVC.
+      # The line below prevents CMake from inserting a variable with line
+      # breaks in the cache
+      string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
+      string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
+      set(CUDA_gpu_detect_output ${__nvcc_out})
+    else()
+      message(WARNING "Running GPU detection script with nvcc failed: 
${__nvcc_out} ${__compile_out}")
+    endif()
+  endif()
+
+  if(NOT CUDA_gpu_detect_output)
+    message(WARNING "Automatic GPU detection failed. Building for all known 
architectures (${mshadow_known_gpu_archs}).")
+    set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE)
+  else()
+    set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
+  endif()
+endfunction()
+
+
+# This list will be used for CUDA_ARCH_NAME = All option
+set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell")
+
+# This list will be used for CUDA_ARCH_NAME = Common option (enabled by 
default)
+set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0")
+
+if (CUDA_TOOLSET VERSION_GREATER "6.5")
+  list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" 
"Maxwell+Tegra")
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2")
+endif ()
+
+if (CUDA_TOOLSET VERSION_GREATER "7.5")
+  list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal")
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX")
+else()
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX")
+endif ()
+
+################################################################################################
+# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
+# Usage:
+#   mshadow_select_nvcc_arch_flags(out_variable)
+function(mshadow_select_nvcc_arch_flags out_variable)
+  
+  set(CUDA_ARCH_LIST "All" CACHE STRING "Select target NVIDIA GPU 
achitecture.")
+  set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "All" 
${CUDA_KNOWN_GPU_ARCHITECTURES} )
+  mark_as_advanced(CUDA_ARCH_NAME)
+    
+    
+  if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
+    set(CUDA_ARCH_LIST "All")
+  endif()
+
+  set(cuda_arch_bin)
+  set(cuda_arch_ptx)
+
+  if("${CUDA_ARCH_LIST}" STREQUAL "All")
+    set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
+    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
+    mshadow_detect_installed_gpus(CUDA_ARCH_LIST)
+    message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
+  endif()
+
+  # Now process the list and look for names
+  string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
+  list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
+  foreach(arch_name ${CUDA_ARCH_LIST})
+    set(arch_bin)
+    set(arch_ptx)
+    set(add_ptx FALSE)
+    # Check to see if we are compiling PTX
+    if(arch_name MATCHES "(.*)\\+PTX$")
+      set(add_ptx TRUE)
+      set(arch_name ${CMAKE_MATCH_1})
+    endif()
+    if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
+      set(arch_bin ${CMAKE_MATCH_1})
+      set(arch_ptx ${arch_bin})
+    else()
+      # Look for it in our list of known architectures
+      if(${arch_name} STREQUAL "Fermi")
+        if (CUDA_TOOLSET VERSION_LESS "8.0")
+          set(arch_bin 2.0 "2.1(2.0)")
+        endif()
+      elseif(${arch_name} STREQUAL "Kepler+Tegra")
+        set(arch_bin 3.2)
+      elseif(${arch_name} STREQUAL "Kepler+Tesla")
+        set(arch_bin 3.7)
+      elseif(${arch_name} STREQUAL "Kepler")
+        set(arch_bin 3.0 3.5)
+        set(arch_ptx 3.5)
+      elseif(${arch_name} STREQUAL "Maxwell+Tegra")
+        set(arch_bin 5.3)
+      elseif(${arch_name} STREQUAL "Maxwell")
+        set(arch_bin 5.0 5.2)
+        set(arch_ptx 5.2)
+      elseif(${arch_name} STREQUAL "Pascal")
+        set(arch_bin 6.0 6.1)
+        set(arch_ptx 6.1)
+      else()
+        message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in 
CUDA_SELECT_NVCC_ARCH_FLAGS")
+      endif()
+    endif()
+    list(APPEND cuda_arch_bin ${arch_bin})
+    if(add_ptx)
+      if (NOT arch_ptx)
+        set(arch_ptx ${arch_bin})
+      endif()
+      list(APPEND cuda_arch_ptx ${arch_ptx})
+    endif()
+  endforeach()
+
+  # remove dots and convert to lists
+  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
+  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX MATCHALL "[0-9]+"   cuda_arch_ptx "${cuda_arch_ptx}")
+
+  if(cuda_arch_bin)
+    list(REMOVE_DUPLICATES cuda_arch_bin)
+  endif()
+  if(cuda_arch_ptx)
+    list(REMOVE_DUPLICATES cuda_arch_ptx)
+  endif()
+    
+  message(STATUS "cuda arch bin: ${cuda_arch_bin}")
+  message(STATUS "cuda arch ptx: ${cuda_arch_ptx}")
+  set(nvcc_flags "")
+  set(nvcc_archs_readable "")
+
+  # Tell NVCC to add binaries for the specified GPUs
+  foreach(arch ${cuda_arch_bin})
+    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
+      # User explicitly specified ARCH for the concrete CODE
+      list(APPEND nvcc_flags -gencode 
arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
+      list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
+    else()
+      # User didn't explicitly specify ARCH for the concrete CODE, we assume 
ARCH=CODE
+      list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
+      list(APPEND nvcc_archs_readable sm_${arch})
+    endif()
+  endforeach()
+
+  # Tell NVCC to add PTX intermediate code for the specified architectures
+  foreach(arch ${cuda_arch_ptx})
+    list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
+    list(APPEND nvcc_archs_readable compute_${arch})
+  endforeach()
+
+  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
+  set(${out_variable}          ${nvcc_flags}          PARENT_SCOPE)
+  set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
+endfunction()
+

-- 
To stop receiving notification emails like this one, please contact
['"comm...@mxnet.apache.org" <comm...@mxnet.apache.org>'].

[incubator-mxnet] branch master updated: use first class cuda with cmake 3.9 and cuda9.0 support (#8572)

Reply via email to