arsenm created this revision.
arsenm added reviewers: Anastasia, yaxunl, jdoerfert.
Herald added subscribers: kosarev, kerbowa, jvesely.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.

Yet another example how convergent not being the default is dangerous
and backwards.


https://reviews.llvm.org/D141449

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
  clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl


Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -315,7 +315,7 @@
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 
addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 
addrspace(4)*)) [[INVOKE_ATTR:#[0-9]+]]
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. 
[[INVGK8]] calls [[INVG8]].
@@ -411,7 +411,7 @@
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}})
-// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})
+// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})  [[INVOKE_ATTR:#[0-9]+]]
 // COMMON: define spir_kernel void [[INVGK2]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK3]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK4]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})
@@ -430,3 +430,5 @@
 // COMMON: define spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 
addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
+
+// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }
Index: clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -182,7 +182,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) 
#[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 
!kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -216,7 +216,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual 
!8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -255,7 +255,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] 
!kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 
!kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
 // CHECK-NEXT:  entry:
@@ -282,7 +282,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) 
#[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 
!kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -297,7 +297,7 @@
 // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone 
"amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 "uniform-work-group-size"="false" }
 // CHECK: attributes #2 = { nocallback nofree nounwind willreturn 
memory(argmem: readwrite) }
 // CHECK: attributes #3 = { convergent noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 }
-// CHECK: attributes #4 = { nounwind "enqueued-block" }
+// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
 // CHECK: attributes #5 = { convergent }
 //.
 // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -12425,6 +12425,7 @@
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
 
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
@@ -12476,6 +12477,7 @@
                                    &CGF.CGM.getModule());
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
   F->addFnAttr("enqueued-block");
 
   auto IP = CGF.Builder.saveIP();


Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -315,7 +315,7 @@
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) [[INVOKE_ATTR:#[0-9]+]]
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -411,7 +411,7 @@
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}})
-// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})  [[INVOKE_ATTR:#[0-9]+]]
 // COMMON: define spir_kernel void [[INVGK2]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK3]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK4]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
@@ -430,3 +430,5 @@
 // COMMON: define spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
 // COMMON: define spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
+
+// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }
Index: clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -182,7 +182,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -216,7 +216,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -255,7 +255,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
 // CHECK-NEXT:  entry:
@@ -282,7 +282,7 @@
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -297,7 +297,7 @@
 // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" }
 // CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 // CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #4 = { nounwind "enqueued-block" }
+// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
 // CHECK: attributes #5 = { convergent }
 //.
 // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -12425,6 +12425,7 @@
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
 
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
@@ -12476,6 +12477,7 @@
                                    &CGF.CGM.getModule());
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
   F->addFnAttr("enqueued-block");
 
   auto IP = CGF.Builder.saveIP();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to