llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Mészáros Gergely (Maetveis) <details> <summary>Changes</summary> `#include <gpuintrin.h>` was leading to `llvm_unreachable(Invalid address space)` in `HandleAddressSpaceTypeAttribute` (SemaType.cpp) when compiling SYCL code. This happens when parsing the `[[clang::opencl_generic]]` attribute in `__gpu_is_ptr_(local|private)` functions. As far as I can tell that attribute and cast is there to allow the code to compile in OpenCL mode without generic address space support. This patch instead explicitly checks for OpenCL without generic address spaces and always returns true/false from `__gpu_is_ptr_(local|private)`. In that case unqualified pointers belong to the private address space by definition as per the OpenCL spec: > If the generic address space is supported i.e. for OpenCL C 2.0 > or OpenCL C 3.0 with __opencl_c_generic_address_space feature, > pointers that are declared without pointing to a named address space > point to the generic address space. > ... > For all other cases that are not listed above the address space is > inferred to __private. This includes: > - All function arguments as well as return values are in the private > address space. --- Full diff: https://github.com/llvm/llvm-project/pull/152314.diff 2 Files Affected: - (modified) clang/lib/Headers/amdgpuintrin.h (+12-4) - (modified) clang/test/Headers/gpuintrin_lang.c (+17) ``````````diff diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index f7fb8e2814180..827ad682a782f 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -165,14 +165,22 @@ __gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { // Returns true if the flat pointer points to AMDGPU 'shared' memory. _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) { - return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)(( - void [[clang::opencl_generic]] *)ptr)); +#if (!defined(__OPENCL_C_VERSION__) && !defined(__OPENCL_CPP_VERSION__)) || \ + defined(__opencl_c_generic_address_space) + return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)(ptr)); +#else + return false; +#endif } // Returns true if the flat pointer points to AMDGPU 'private' memory. _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) { - return __builtin_amdgcn_is_private((void [[clang::address_space(0)]] *)(( - void [[clang::opencl_generic]] *)ptr)); +#if (!defined(__OPENCL_C_VERSION__) && !defined(__OPENCL_CPP_VERSION__)) || \ + defined(__opencl_c_generic_address_space) + return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)(ptr)); +#else + return true; +#endif } // Terminates execution of the associated wavefront. diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c index 653f87aea2ce3..f4578ff781a7f 100644 --- a/clang/test/Headers/gpuintrin_lang.c +++ b/clang/test/Headers/gpuintrin_lang.c @@ -26,12 +26,19 @@ // RUN: -std=c89 -internal-isystem %S/../../lib/Headers/ \ // RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=C89 +// +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// RUN: -internal-isystem %S/../../lib/Headers/ \ +// RUN: -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=SYCL #define _DEFAULT_FN_ATTRS __attribute__((always_inline)) #include <gpuintrin.h> #ifdef __device__ __device__ int foo() { return __gpu_thread_id_x(); } +#elif defined(SYCL_EXTERNAL) +SYCL_EXTERNAL int foo() { return __gpu_thread_id_x(); } #else // CUDA-LABEL: define dso_local i32 @foo( // CUDA-SAME: ) #[[ATTR0:[0-9]+]] { @@ -71,6 +78,16 @@ __device__ int foo() { return __gpu_thread_id_x(); } // C89-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() // C89-NEXT: ret i32 [[TMP0]] // +// SYCL-LABEL: define dso_local i32 @foo( +// SYCL-SAME: ) #[[ATTR0:[0-9]+]] { +// SYCL-NEXT: [[ENTRY:.*:]] +// SYCL-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4, addrspace(5) +// SYCL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// SYCL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SYCL-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr +// SYCL-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +// SYCL-NEXT: ret i32 [[TMP0]] +// int foo() { return __gpu_thread_id_x(); } #pragma omp declare target to(foo) #endif `````````` </details> https://github.com/llvm/llvm-project/pull/152314 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits