[clang] [AMDGPU] Check wavefrontsize for GFX11 WMMA builtins (PR #79980)

2024-01-30 Thread Christudasan Devadasan via cfe-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Check wavefrontsize for GFX11 WMMA builtins (PR #79980)

2024-01-30 Thread Christudasan Devadasan via cfe-commits


@@ -21,14 +21,14 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* 
out4f, v16h a16h, v16h b
 global v8s* out8s, v4i a4i, v4i 
b4i, v8s c8s,
 global v4i* out4i, v2i a2i, v2i 
b2i, v4i c4i)
 {
- *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f);  // 
expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target 
feature gfx11-insts}}
- *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f);  // 
expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target 
feature gfx11-insts}}
- *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); 
// expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target 
feature gfx11-insts}}
- *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); 
// expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target 
feature gfx11-insts}}
- *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, 
true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' 
needs target feature gfx11-insts}}
- *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, 
true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' 
needs target feature gfx11-insts}}
- *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, 
c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' 
needs target feature gfx11-insts}}
- *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, 
c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' 
needs target feature gfx11-insts}}
+ *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f);  // 
expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target 
feature gfx11-insts,wavefrontsize64}}
+ *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f);  // 
expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target 
feature gfx11-insts,wavefrontsize64}}
+ *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); 
// expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target 
feature gfx11-insts,wavefrontsize64}}
+ *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); 
// expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target 
feature gfx11-insts,wavefrontsize64}}
+ *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, 
true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' 
needs target feature gfx11-insts,wavefrontsize64}}
+ *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, 
true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' 
needs target feature gfx11-insts,wavefrontsize64}}
+ *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, 
c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' 
needs target feature gfx11-insts,wavefrontsize64}}
+ *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, 
c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' 
needs target feature gfx11-insts,wavefrontsize64}}
 }
 
-#endif
\ No newline at end of file
+#endif

cdevadas wrote:

Have you added a new line here?

https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r366683 - Updated the signature for some stack related intrinsics (CLANG)

2019-07-22 Thread Christudasan Devadasan via cfe-commits
Author: cdevadas
Date: Mon Jul 22 05:50:30 2019
New Revision: 366683

URL: http://llvm.org/viewvc/llvm-project?rev=366683=rev
Log:
Updated the signature for some stack related intrinsics (CLANG)

Modified the intrinsics
int_addressofreturnaddress,
int_frameaddress & int_sponentry.
This commit depends on the changes in rL366679

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D64563

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CGException.cpp
cfe/trunk/test/CodeGen/builtin-sponentry.c
cfe/trunk/test/CodeGen/exceptions-seh.c
cfe/trunk/test/CodeGen/integer-overflow.c
cfe/trunk/test/CodeGen/ms-intrinsics.c
cfe/trunk/test/CodeGen/ms-setjmp.c
cfe/trunk/test/CodeGenOpenCL/builtins-generic-amdgcn.cl

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=366683=366682=366683=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jul 22 05:50:30 2019
@@ -843,10 +843,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFu
 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
 Arg1Ty = CGF.Int8PtrTy;
 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
-  Arg1 = 
CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
+  Arg1 = CGF.Builder.CreateCall(
+  CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
 } else
-  Arg1 = 
CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
-llvm::ConstantInt::get(CGF.Int32Ty, 0));
+  Arg1 = CGF.Builder.CreateCall(
+  CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
+  llvm::ConstantInt::get(CGF.Int32Ty, 0));
   }
 
   // Mark the call site and declaration with ReturnsTwice.
@@ -2556,7 +2558,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   case Builtin::BI__builtin_frame_address: {
 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
getContext().UnsignedIntTy);
-Function *F = CGM.getIntrinsic(Intrinsic::frameaddress);
+Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
 return RValue::get(Builder.CreateCall(F, Depth));
   }
   case Builtin::BI__builtin_extract_return_addr: {
@@ -2637,9 +2639,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 Address Buf = EmitPointerWithAlignment(E->getArg(0));
 
 // Store the frame pointer to the setjmp buffer.
-Value *FrameAddr =
-  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
+Value *FrameAddr = Builder.CreateCall(
+CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
+ConstantInt::get(Int32Ty, 0));
 Builder.CreateStore(FrameAddr, Buf);
 
 // Store the stack pointer to the setjmp buffer.
@@ -7293,12 +7295,13 @@ Value *CodeGenFunction::EmitAArch64Built
   }
 
   if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
-llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+llvm::Function *F =
+CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
 return Builder.CreateCall(F);
   }
 
   if (BuiltinID == AArch64::BI__builtin_sponentry) {
-llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry);
+llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, 
AllocaInt8PtrTy);
 return Builder.CreateCall(F);
   }
 
@@ -12113,7 +12116,8 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   }
 
   case X86::BI_AddressOfReturnAddress: {
-Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+Function *F =
+CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
 return Builder.CreateCall(F);
   }
   case X86::BI__stosb: {

Modified: cfe/trunk/lib/CodeGen/CGException.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGException.cpp?rev=366683=366682=366683=diff
==
--- cfe/trunk/lib/CodeGen/CGException.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGException.cpp Mon Jul 22 05:50:30 2019
@@ -1774,7 +1774,8 @@ void CodeGenFunction::EmitCapturedLocals
 // EH registration is passed in as the EBP physical register.  We can
 // recover that with llvm.frameaddress(1).
 EntryFP = Builder.CreateCall(
-CGM.getIntrinsic(llvm::Intrinsic::frameaddress), 
{Builder.getInt32(1)});
+CGM.getIntrinsic(llvm::Intrinsic::frameaddress, AllocaInt8PtrTy),
+{Builder.getInt32(1)});
   } else {
 // Otherwise, for x64 and 32-bit finally functions, the parent FP is the
 // second parameter.

Modified: cfe/trunk/test/CodeGen/builtin-sponentry.c
URL: 

r365643 - [AMDGPU] Increased the number of implicit argument bytes for both OpenCL and HIP (CLANG).

2019-07-10 Thread Christudasan Devadasan via cfe-commits
Author: cdevadas
Date: Wed Jul 10 08:10:08 2019
New Revision: 365643

URL: http://llvm.org/viewvc/llvm-project?rev=365643=rev
Log:
[AMDGPU] Increased the number of implicit argument bytes for both OpenCL and 
HIP (CLANG).

To enable a new implicit kernel argument,
increased the number of argument bytes from 48 to 56.

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D63756

Modified:
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=365643=365642=365643=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Wed Jul 10 08:10:08 2019
@@ -7910,7 +7910,7 @@ void AMDGPUTargetCodeGenInfo::setTargetA
   if (((M.getLangOpts().OpenCL && FD->hasAttr()) ||
   (M.getLangOpts().HIP && FD->hasAttr())) &&
   (M.getTriple().getOS() == llvm::Triple::AMDHSA))
-F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
+F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
 
   const auto *FlatWGS = FD->getAttr();
   if (ReqdWGS || FlatWGS) {

Modified: cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu?rev=365643=365642=365643=diff
==
--- cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu Wed Jul 10 
08:10:08 2019
@@ -5,4 +5,4 @@
 __global__ void hip_kernel_temp() {
 }
 
-// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48"
+// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="56"

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=365643=365642=365643=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Wed Jul 10 08:10:08 2019
@@ -158,30 +158,30 @@ void a_function() {
 // CHECK-NOT: "amdgpu-num-sgpr"="0"
 // CHECK-NOT: "amdgpu-num-vgpr"="0"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
"amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" 
"amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" 
"amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" 
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
"amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" 
"amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" 
"amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind 
optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" 
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { 
convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
"amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = {