Author: Lewis Crawford Date: 2026-03-11T09:49:02Z New Revision: 380ac9e301ec63052795101a087c704645d569a5
URL: https://github.com/llvm/llvm-project/commit/380ac9e301ec63052795101a087c704645d569a5 DIFF: https://github.com/llvm/llvm-project/commit/380ac9e301ec63052795101a087c704645d569a5.diff LOG: [NVPTX][clang] Ensure CLZ(0) is defined on NVPTX (#185630) CUDA semantics specify that clz(0) = bitwidth, so clang should emit clz / ctz intrinsics for NVPTX with zero-is-poison = false. Added: clang/test/CodeGenCUDA/builtin-count-zeros-nvptx.cu Modified: clang/lib/Basic/Targets/NVPTX.h clang/test/Headers/gpuintrin.c Removed: ################################################################################ diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 6f8df323f379c..7921a042e9e9b 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -83,6 +83,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { bool useFP16ConversionIntrinsics() const override { return false; } + bool isCLZForZeroUndef() const override { return false; } + bool initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, diff --git a/clang/test/CodeGenCUDA/builtin-count-zeros-nvptx.cu b/clang/test/CodeGenCUDA/builtin-count-zeros-nvptx.cu new file mode 100644 index 0000000000000..f003b32ca73b2 --- /dev/null +++ b/clang/test/CodeGenCUDA/builtin-count-zeros-nvptx.cu @@ -0,0 +1,12 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x cuda -triple nvptx64-unknown-unknown -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +// +// Ensure NVPTX uses isCLZForZeroUndef() = false (CUDA semantics: CLZ(i32 0) == 32). + +#include "Inputs/cuda.h" + +__device__ int f(int x) { + return __builtin_ctz(x) + __builtin_clz(x); +} +// CHECK: call i32 @llvm.cttz.i32({{.*}}, i1 false) +// CHECK: call i32 @llvm.ctlz.i32({{.*}}, i1 false) diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index 04b50acc4a049..c6a20dec210bb 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -1109,7 +1109,7 @@ __gpu_kernel void foo() { // NVPTX-NEXT: br i1 [[TOBOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // NVPTX: [[COND_TRUE]]: // NVPTX-NEXT: [[TMP3:%.*]] = load i64, ptr [[__BELOW]], align 8 -// NVPTX-NEXT: [[TMP4:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP3]], i1 true) +// NVPTX-NEXT: [[TMP4:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP3]], i1 false) // NVPTX-NEXT: [[CAST:%.*]] = trunc i64 [[TMP4]] to i32 // NVPTX-NEXT: [[SUB2:%.*]] = sub nsw i32 63, [[CAST]] // NVPTX-NEXT: br label %[[COND_END:.*]] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
