grokos created this revision.
grokos added a project: OpenMP.
Herald added a subscriber: mgorny.
The current implementation of the nvptx runtime (to be upstreamed shortly) uses
the `atomicMax` operation on 64-bit integers. This is only supported in compute
capabilities 3.5 and later. I've changed the clang default to `sm_35`.
Repository:
rC Clang
https://reviews.llvm.org/D40977
Files:
CMakeLists.txt
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -241,14 +241,15 @@
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
"Default OpenMP runtime used by -fopenmp.")
-# OpenMP offloading requires at least sm_30 because we use shuffle instructions
-# to generate efficient code for reductions.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+# OpenMP offloading requires at least sm_35 because we use shuffle instructions
+# to generate efficient code for reductions and the atomicMax instruction on
+# 64-bit integers in the implementation of conditional lastprivate.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs.")
string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
- message(WARNING "Resetting default architecture for OpenMP offloading to
Nvidia GPUs to sm_30")
- set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+ message(WARNING "Resetting default architecture for OpenMP offloading to
Nvidia GPUs to sm_35")
+ set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
endif()
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -241,14 +241,15 @@
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
"Default OpenMP runtime used by -fopenmp.")
-# OpenMP offloading requires at least sm_30 because we use shuffle instructions
-# to generate efficient code for reductions.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+# OpenMP offloading requires at least sm_35 because we use shuffle instructions
+# to generate efficient code for reductions and the atomicMax instruction on
+# 64-bit integers in the implementation of conditional lastprivate.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs.")
string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
- message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
- set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+ message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+ set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
endif()
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits