[clang] [AMDGPU] Check wavefrontsize for GFX11 WMMA builtins (PR #79980)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/79980 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Check wavefrontsize for GFX11 WMMA builtins (PR #79980)
@@ -21,14 +21,14 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out4f, v16h a16h, v16h b global v8s* out8s, v4i a4i, v4i b4i, v8s c8s, global v4i* out4i, v2i a2i, v2i b2i, v4i c4i) { - *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f); // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target feature gfx11-insts}} - *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f); // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target feature gfx11-insts}} - *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target feature gfx11-insts}} - *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target feature gfx11-insts}} - *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' needs target feature gfx11-insts}} - *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' needs target feature gfx11-insts}} - *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' needs target feature gfx11-insts}} - *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' needs target feature gfx11-insts}} + *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f); // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f); // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' needs target feature gfx11-insts,wavefrontsize64}} + *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' needs target feature gfx11-insts,wavefrontsize64}} } -#endif \ No newline at end of file +#endif cdevadas wrote: Have you added a new line here? https://github.com/llvm/llvm-project/pull/79980 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r366683 - Updated the signature for some stack related intrinsics (CLANG)
Author: cdevadas Date: Mon Jul 22 05:50:30 2019 New Revision: 366683 URL: http://llvm.org/viewvc/llvm-project?rev=366683&view=rev Log: Updated the signature for some stack related intrinsics (CLANG) Modified the intrinsics int_addressofreturnaddress, int_frameaddress & int_sponentry. This commit depends on the changes in rL366679 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D64563 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/CodeGen/CGException.cpp cfe/trunk/test/CodeGen/builtin-sponentry.c cfe/trunk/test/CodeGen/exceptions-seh.c cfe/trunk/test/CodeGen/integer-overflow.c cfe/trunk/test/CodeGen/ms-intrinsics.c cfe/trunk/test/CodeGen/ms-setjmp.c cfe/trunk/test/CodeGenOpenCL/builtins-generic-amdgcn.cl Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=366683&r1=366682&r2=366683&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jul 22 05:50:30 2019 @@ -843,10 +843,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFu Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); } else - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), -llvm::ConstantInt::get(CGF.Int32Ty, 0)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -2556,7 +2558,7 @@ RValue CodeGenFunction::EmitBuiltinExpr( case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); -Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); +Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2637,9 +2639,9 @@ RValue CodeGenFunction::EmitBuiltinExpr( Address Buf = EmitPointerWithAlignment(E->getArg(0)); // Store the frame pointer to the setjmp buffer. -Value *FrameAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), - ConstantInt::get(Int32Ty, 0)); +Value *FrameAddr = Builder.CreateCall( +CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), +ConstantInt::get(Int32Ty, 0)); Builder.CreateStore(FrameAddr, Buf); // Store the stack pointer to the setjmp buffer. @@ -7293,12 +7295,13 @@ Value *CodeGenFunction::EmitAArch64Built } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { -llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } if (BuiltinID == AArch64::BI__builtin_sponentry) { -llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); +llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } @@ -12113,7 +12116,8 @@ Value *CodeGenFunction::EmitX86BuiltinEx } case X86::BI_AddressOfReturnAddress: { -Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); +Function *F = +CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } case X86::BI__stosb: { Modified: cfe/trunk/lib/CodeGen/CGException.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGException.cpp?rev=366683&r1=366682&r2=366683&view=diff == --- cfe/trunk/lib/CodeGen/CGException.cpp (original) +++ cfe/trunk/lib/CodeGen/CGException.cpp Mon Jul 22 05:50:30 2019 @@ -1774,7 +1774,8 @@ void CodeGenFunction::EmitCapturedLocals // EH registration is passed in as the EBP physical register. We can // recover that with llvm.frameaddress(1). EntryFP = Builder.CreateCall( -CGM.getIntrinsic(llvm::Intrinsic::frameaddress), {Builder.getInt32(1)}); +CGM.getIntrinsic(llvm::Intrinsic::frameaddress, AllocaInt8PtrTy), +{Builder.getInt32(1)}); } else { // Otherwise, for x64 and 32-bit finally functions, the parent FP is the // second parameter. Modified: cfe/trunk/test/CodeGen/builtin-sponentry.c URL: http://ll
r365643 - [AMDGPU] Increased the number of implicit argument bytes for both OpenCL and HIP (CLANG).
Author: cdevadas Date: Wed Jul 10 08:10:08 2019 New Revision: 365643 URL: http://llvm.org/viewvc/llvm-project?rev=365643&view=rev Log: [AMDGPU] Increased the number of implicit argument bytes for both OpenCL and HIP (CLANG). To enable a new implicit kernel argument, increased the number of argument bytes from 48 to 56. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D63756 Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=365643&r1=365642&r2=365643&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Wed Jul 10 08:10:08 2019 @@ -7910,7 +7910,7 @@ void AMDGPUTargetCodeGenInfo::setTargetA if (((M.getLangOpts().OpenCL && FD->hasAttr()) || (M.getLangOpts().HIP && FD->hasAttr())) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) -F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); +F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { Modified: cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu?rev=365643&r1=365642&r2=365643&view=diff == --- cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu (original) +++ cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu Wed Jul 10 08:10:08 2019 @@ -5,4 +5,4 @@ __global__ void hip_kernel_temp() { } -// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48" +// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="56" Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=365643&r1=365642&r2=365643&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Wed Jul 10 08:10:08 2019 @@ -158,30 +158,30 @@ void a_function() { // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GRO