Author: Matt Arsenault Date: 2026-03-09T11:17:30+01:00 New Revision: d840396e20b09244ac3defcc1f2946102e29f030
URL: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030 DIFF: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030.diff LOG: clang: Simplify emission of uniform-work-group-size attribute (#185066) Added: Modified: clang/include/clang/Options/Options.td clang/lib/CodeGen/CGCall.cpp clang/test/CodeGenCUDA/convergent.cu clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu clang/test/CodeGenHIP/default-attributes.hip clang/test/CodeGenHIP/hip_weak_alias.cpp clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl clang/test/CodeGenOpenCL/convergent.cl Removed: ################################################################################ diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index cc05fb71c84e4..45902aee92f72 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -1035,7 +1035,7 @@ def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>, Group<Link_Group>; defm offload_uniform_block : BoolFOption<"offload-uniform-block", - LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">, + LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA || (LangOpts->OpenCL && LangOpts->OpenCLVersion <= 120)">, PosFlag<SetTrue, [], [ClangOption, CC1Option], "Assume">, NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">, BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 04b27925bab8e..6dfd0f915190a 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2611,28 +2611,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, NumElemsParam); } - if (DeviceKernelAttr::isOpenCLSpelling( - TargetDecl->getAttr<DeviceKernelAttr>()) && - CallingConv != CallingConv::CC_C && - CallingConv != CallingConv::CC_SpirFunction) { - // Check CallingConv to avoid adding uniform-work-group-size attribute to - // OpenCL Kernel Stub - if (getLangOpts().OpenCLVersion <= 120) { - // OpenCL v1.2 Work groups are always uniform - FuncAttrs.addAttribute("uniform-work-group-size"); - } else { - // OpenCL v2.0 Work groups may be whether uniform or not. - // '-cl-uniform-work-group-size' compile option gets a hint - // to the compiler that the global work-size be a multiple of - // the work-group size specified to clEnqueueNDRangeKernel - // (i.e. work groups are uniform). - if (getLangOpts().OffloadUniformBlock) - FuncAttrs.addAttribute("uniform-work-group-size"); - } - } - - if (TargetDecl->hasAttr<CUDAGlobalAttr>() && - getLangOpts().OffloadUniformBlock) + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + if (getLangOpts().OffloadUniformBlock) FuncAttrs.addAttribute("uniform-work-group-size"); if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>()) diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu index bb034ee4ff442..97a24dc20d841 100644 --- a/clang/test/CodeGenCUDA/convergent.cu +++ b/clang/test/CodeGenCUDA/convergent.cu @@ -36,27 +36,27 @@ __host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingels // DEVICE-NEXT: call void @_Z3bazv() #[[ATTR4:[0-9]+]] // DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META2:![0-9]+]] // DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 -// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META3:![0-9]+]] -// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META4:![0-9]+]] +// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR6:[0-9]+]], !srcloc [[META3:![0-9]+]] +// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR7:[0-9]+]], !srcloc [[META4:![0-9]+]] // DEVICE-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 // DEVICE-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]] // DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]] +// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR8:[0-9]+]] // DEVICE-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z3barv( // HOST-SAME: ) #[[ATTR0:[0-9]+]] { // HOST-NEXT: [[ENTRY:.*:]] // HOST-NEXT: [[X:%.*]] = alloca i32, align 4 -// HOST-NEXT: call void @_Z3bazv() -// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META1:![0-9]+]] +// HOST-NEXT: call void @_Z3bazv() #[[ATTR2:[0-9]+]] +// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META1:![0-9]+]] // HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 -// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META2:![0-9]+]] -// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META3:![0-9]+]] +// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc [[META2:![0-9]+]] +// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4]], !srcloc [[META3:![0-9]+]] // HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 -// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) +// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR2]] // HOST-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) +// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR2]] // HOST-NEXT: ret void // __host__ __device__ void bar() { @@ -71,27 +71,30 @@ __host__ __device__ void bar() { //. -// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// DEVICE: attributes #[[ATTR4]] = { convergent nounwind } +// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// DEVICE: attributes #[[ATTR4]] = { convergent nounwind "uniform-work-group-size" } // DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) } -// DEVICE: attributes #[[ATTR6]] = { nounwind } +// DEVICE: attributes #[[ATTR6]] = { convergent nounwind } +// DEVICE: attributes #[[ATTR7]] = { nounwind } +// DEVICE: attributes #[[ATTR8]] = { nounwind "uniform-work-group-size" } //. -// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// HOST: attributes #[[ATTR2]] = { nounwind memory(none) } -// HOST: attributes #[[ATTR3]] = { nounwind } +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" } +// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" } +// HOST: attributes #[[ATTR2]] = { "uniform-work-group-size" } +// HOST: attributes #[[ATTR3]] = { nounwind memory(none) } +// HOST: attributes #[[ATTR4]] = { nounwind } //. // DEVICE: [[META0:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} // DEVICE: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// DEVICE: [[META2]] = !{i64 3120} -// DEVICE: [[META3]] = !{i64 3155} -// DEVICE: [[META4]] = !{i64 3206} +// DEVICE: [[META2]] = !{i64 3174} +// DEVICE: [[META3]] = !{i64 3209} +// DEVICE: [[META4]] = !{i64 3260} //. // HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// HOST: [[META1]] = !{i64 3120} -// HOST: [[META2]] = !{i64 3155} -// HOST: [[META3]] = !{i64 3206} +// HOST: [[META1]] = !{i64 3174} +// HOST: [[META2]] = !{i64 3209} +// HOST: [[META3]] = !{i64 3260} //. diff --git a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu index bd1da1f05c1eb..f4641fd242b4d 100644 --- a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu +++ b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu @@ -16,7 +16,7 @@ int main(int argc, char ** argv) { // CHECK: store ptr @_Z4kern7TempValIjE, ptr %fptr, align 8 return 0; } -// CHECK: define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #1 { +// CHECK: define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #{{[0-9]+}} { // CHECK: %2 = call i32 @hipLaunchByPtr(ptr @_Z4kern7TempValIjE) // CHECK: define internal void @__hip_register_globals(ptr %0) { diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index faccebbdad7e7..d5b5aa0c84ace 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -33,10 +33,10 @@ __global__ void kernel() { extern_func(); } //. -// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } -// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } +// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind "uniform-work-group-size" } //. // OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp index 480a278dc55d3..33ff74a1cf995 100644 --- a/clang/test/CodeGenHIP/hip_weak_alias.cpp +++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp @@ -119,9 +119,9 @@ __host__ __device__ float __Four(float f) { return 2.0f * f; } __host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv"))); __host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf"))); //. -// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" } //. -// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } //. // HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl index af6cc30fcd329..42630375aeb8f 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl @@ -198,7 +198,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { //. // SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // SPIR32: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } // SPIR32: attributes #[[ATTR4]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size" } diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl index 99d9ee74e669b..cefa4d7f8ebe9 100644 --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -127,18 +127,18 @@ void test_not_unroll() { // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] // CHECK-LABEL: @assume_convergent_asm -// CHECK: tail call void asm sideeffect "s_barrier", ""() #6 +// CHECK: tail call void asm sideeffect "s_barrier", ""() #8 kernel void assume_convergent_asm() { __asm__ volatile("s_barrier"); } // CHECK: attributes #0 = { nofree noinline norecurse nounwind memory(readwrite, target_mem0: none, target_mem1: none) " -// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } -// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} } -// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} } -// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} } +// CHECK: attributes #1 = { convergent norecurse nounwind{{[^}]*}} } +// CHECK: attributes #2 = { convergent nounwind{{[^}]*}} } +// CHECK: attributes #3 = { convergent noduplicate nounwind{{[^}]*}} } +// CHECK: attributes #4 = { alwaysinline convergent norecurse nounwind{{[^}]*}} } +// CHECK: attributes #5 = { convergent nounwind "uniform-work-group-size" } +// CHECK: attributes #6 = { nounwind "uniform-work-group-size" } +// CHECK: attributes #7 = { convergent noduplicate nounwind "uniform-work-group-size" } +// CHECK: attributes #8 = { convergent nounwind } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
