https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/173699
>From 1fe8fff7fdf129c117d86d4bac19c877032b5f3d Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sat, 27 Dec 2025 12:12:09 +0800 Subject: [PATCH 1/2] [clang-tidy] Add documentation and smoke test for CUDA --- clang-tools-extra/docs/clang-tidy/index.rst | 14 ++++++++++++++ .../test/clang-tidy/infrastructure/basic-cuda.cu | 9 +++++++++ 2 files changed, 23 insertions(+) create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index 34da529902308..38aabc77540cf 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -349,6 +349,20 @@ An overview of all the command-line options: some-check.SomeOption: 'some value' ... +Running Clang-Tidy on CUDA Files +-------------------------------- + +:program:`clang-tidy` supports analyzing CUDA source files. +To correctly process host-side code, specify the CUDA toolkit path using +``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``. + +.. code-block:: console + + $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only + +Using ``--cuda-host-only`` is recommended as it skips device-side compilation, +speeding up the analysis and avoiding potential device-specific errors. + Clang-Tidy Automation ===================== diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu new file mode 100644 index 0000000000000..3bc605d864461 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu @@ -0,0 +1,9 @@ +// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s + +#define __global__ __attribute__((global)) + +// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr] +__global__ void kernel(int *p) { p = 0; } + +// CHECK: :[[@LINE+1]]:11: warning: use nullptr [modernize-use-nullptr] +void *p = 0; >From 13e3f45d598d5a62e475c0256b629a6e4fad103c Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sat, 27 Dec 2025 17:53:47 +0800 Subject: [PATCH 2/2] Address review feedback --- clang-tools-extra/docs/clang-tidy/index.rst | 17 +- .../usr/local/cuda/include/cuda_runtime.h | 253 ++++++++++++++++++ .../clang-tidy/infrastructure/basic-cuda.cu | 11 +- 3 files changed, 273 insertions(+), 8 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index 38aabc77540cf..4a0bab8693878 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -352,16 +352,21 @@ An overview of all the command-line options: Running Clang-Tidy on CUDA Files -------------------------------- -:program:`clang-tidy` supports analyzing CUDA source files. -To correctly process host-side code, specify the CUDA toolkit path using -``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``. +:program:`clang-tidy` supports analyzing CUDA source files. To ensure correct +header resolution, it is important to specify the CUDA toolkit path using +``--cuda-path``. For more details on how Clang handles CUDA, see +`Compiling CUDA with Clang <https://llvm.org/docs/CompileCudaWithLLVM.html>`_. .. code-block:: console - $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only + $ clang-tidy source.cu -- --cuda-path=/path/to/cuda -Using ``--cuda-host-only`` is recommended as it skips device-side compilation, -speeding up the analysis and avoiding potential device-specific errors. +By default, :program:`clang-tidy` will compile the code for the host. To +analyze device-side code, use the ``--cuda-device-only`` flag: + +.. code-block:: console + + $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-device-only Clang-Tidy Automation ===================== diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h new file mode 100644 index 0000000000000..421fa4dd7dbae --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h @@ -0,0 +1,253 @@ +/* Minimal declarations for CUDA support. Testing purposes only. */ + +#include <stddef.h> + +#if __HIP__ || __CUDA__ +#define __constant__ __attribute__((constant)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __host__ __attribute__((host)) +#define __shared__ __attribute__((shared)) +#if __HIP__ +#define __managed__ __attribute__((managed)) +#endif +#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) +#define __grid_constant__ __attribute__((grid_constant)) +#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) +#define __no_cluster__ __attribute__((no_cluster)) +#else +#define __constant__ +#define __device__ +#define __global__ +#define __host__ +#define __shared__ +#define __managed__ +#define __launch_bounds__(...) +#define __grid_constant__ +#define __cluster_dims__(...) +#define __no_cluster__ +#endif + +struct dim3 { + unsigned x, y, z; + __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} +}; + +#if __HIP__ || HIP_PLATFORM +typedef struct hipStream *hipStream_t; +typedef enum hipError {} hipError_t; +int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + hipStream_t stream = 0); +#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__ +extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, + hipStream_t stream); +#else +extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, + hipStream_t stream); +#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__ +#elif __OFFLOAD_VIA_LLVM__ +extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim, + size_t sharedMem = 0, void *stream = 0); +extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + void **args, size_t sharedMem = 0, void *stream = 0); +#else +typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); +extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); +extern "C" __device__ cudaError_t cudaLaunchDevice(void *func, + void *parameterBuffer, + dim3 gridDim, dim3 blockDim, + unsigned int sharedMem, + cudaStream_t stream); +extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment, + size_t size); +#endif + +extern "C" __device__ int printf(const char*, ...); + +struct char1 { + char x; + __host__ __device__ char1(char x = 0) : x(x) {} +}; +struct char2 { + char x, y; + __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} +}; +struct char4 { + char x, y, z, w; + __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uchar1 { + unsigned char x; + __host__ __device__ uchar1(unsigned char x = 0) : x(x) {} +}; +struct uchar2 { + unsigned char x, y; + __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} +}; +struct uchar4 { + unsigned char x, y, z, w; + __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct short1 { + short x; + __host__ __device__ short1(short x = 0) : x(x) {} +}; +struct short2 { + short x, y; + __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} +}; +struct short4 { + short x, y, z, w; + __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ushort1 { + unsigned short x; + __host__ __device__ ushort1(unsigned short x = 0) : x(x) {} +}; +struct ushort2 { + unsigned short x, y; + __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} +}; +struct ushort4 { + unsigned short x, y, z, w; + __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct int1 { + int x; + __host__ __device__ int1(int x = 0) : x(x) {} +}; +struct int2 { + int x, y; + __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} +}; +struct int4 { + int x, y, z, w; + __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uint1 { + unsigned x; + __host__ __device__ uint1(unsigned x = 0) : x(x) {} +}; +struct uint2 { + unsigned x, y; + __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} +}; +struct uint3 { + unsigned x, y, z; + __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} +}; +struct uint4 { + unsigned x, y, z, w; + __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct longlong1 { + long long x; + __host__ __device__ longlong1(long long x = 0) : x(x) {} +}; +struct longlong2 { + long long x, y; + __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} +}; +struct longlong4 { + long long x, y, z, w; + __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ulonglong1 { + unsigned long long x; + __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {} +}; +struct ulonglong2 { + unsigned long long x, y; + __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} +}; +struct ulonglong4 { + unsigned long long x, y, z, w; + __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct float1 { + float x; + __host__ __device__ float1(float x = 0) : x(x) {} +}; +struct float2 { + float x, y; + __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} +}; +struct float4 { + float x, y, z, w; + __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct double1 { + double x; + __host__ __device__ double1(double x = 0) : x(x) {} +}; +struct double2 { + double x, y; + __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} +}; +struct double4 { + double x, y, z, w; + __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} +}; + +typedef unsigned long long cudaTextureObject_t; +typedef unsigned long long cudaSurfaceObject_t; + +enum cudaTextureReadMode { + cudaReadModeNormalizedFloat, + cudaReadModeElementType +}; + +enum cudaSurfaceBoundaryMode { + cudaBoundaryModeZero, + cudaBoundaryModeClamp, + cudaBoundaryModeTrap +}; + +enum { + cudaTextureType1D, + cudaTextureType2D, + cudaTextureType3D, + cudaTextureTypeCubemap, + cudaTextureType1DLayered, + cudaTextureType2DLayered, + cudaTextureTypeCubemapLayered +}; + +struct textureReference { }; +template <class T, int texType = cudaTextureType1D, + enum cudaTextureReadMode mode = cudaReadModeElementType> +struct __attribute__((device_builtin_texture_type)) texture + : public textureReference {}; + +struct surfaceReference { int desc; }; + +template <typename T, int dim = 1> +struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {}; diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu index 3bc605d864461..37b3b5ab7ade6 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu +++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu @@ -1,6 +1,13 @@ -// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s +// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \ +// RUN: --cuda-host-only | FileCheck %s +// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \ +// RUN: --cuda-device-only | FileCheck %s -#define __global__ __attribute__((global)) +#include <cuda_runtime.h> // CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr] __global__ void kernel(int *p) { p = 0; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
