[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
vikramRH wrote: Updated this PR to be in sync with #89217, However still plan is to land this land this only after changes in #89217 are accepted. https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); vikramRH wrote: added a new helper https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); arsenm wrote: If there's really not a helper to just EmitScalarExpr for N arguments, there should be one used here https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
vikramRH wrote: 1. Added/updated tests for permlanex16, permlane64 2. This needs https://github.com/llvm/llvm-project/pull/89217 to land first so that only incremental changes can be reviewed. https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, ? Src0 : B.buildBitcast(LLT::scalar(Size), Src0).getReg(0); Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0); -if (Src2.isValid()) { + +if (IsPermLane16) { + Register Src1Cast = + MRI.getType(Src1).isScalar() + ? Src1 + : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0); vikramRH wrote: Yes, I will take over the changes from https://github.com/llvm/llvm-project/pull/89217 once finalized, https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +Intrinsic::ID IID; +IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 + ? Intrinsic::amdgcn_permlane16 + : Intrinsic::amdgcn_permlanex16; + +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); vikramRH wrote: yes https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, ? Src0 : B.buildBitcast(LLT::scalar(Size), Src0).getReg(0); Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0); -if (Src2.isValid()) { + +if (IsPermLane16) { + Register Src1Cast = + MRI.getType(Src1).isScalar() + ? Src1 + : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0); arsenm wrote: Like the other patch, shouldn't need any bitcasts https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +Intrinsic::ID IID; +IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 + ? Intrinsic::amdgcn_permlane16 + : Intrinsic::amdgcn_permlanex16; + +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); arsenm wrote: I assume EmitScalarExpr handles the immargs correctly? https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +Intrinsic::ID IID; +IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 arsenm wrote: combine declare + define, also can sink down to use https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/arsenm commented: On this and the previous, can you add a section to AMDGPUUsage for the intrinsics and what types they support https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff e76b257483e6c6743de0fa6eca4d0cc60e08385d db1933033fd37bbbab0b845eed53405db365b0e6 -- clang/lib/CodeGen/CGBuiltin.cpp llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h llvm/lib/Target/AMDGPU/SIISelLowering.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a0f949495e..9ce2f5b6c1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18482,9 +18482,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: { Intrinsic::ID IID; -IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 - ? Intrinsic::amdgcn_permlane16 - : Intrinsic::amdgcn_permlanex16; +IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 + ? Intrinsic::amdgcn_permlane16 + : Intrinsic::amdgcn_permlanex16; llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index cc4797b42d..b28c3521d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5416,10 +5416,12 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, Register Src3 = MI.getOperand(5).getReg(); Register Src4 = MI.getOperand(6).getImm(); Register Src5 = MI.getOperand(7).getImm(); - return LaneOp.addUse(Src1).addUse(Src2). -addUse(Src3). -addImm(Src4). -addImm(Src5).getReg(0); + return LaneOp.addUse(Src1) + .addUse(Src2) + .addUse(Src3) + .addImm(Src4) + .addImm(Src5) + .getReg(0); } default: llvm_unreachable("unhandled lane op"); @@ -5427,7 +5429,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, }; Register Src1, Src2; - if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane || IsPermLane16) { + if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane || + IsPermLane16) { Src1 = MI.getOperand(3).getReg(); if (IID == Intrinsic::amdgcn_writelane || IsPermLane16) { Src2 = MI.getOperand(4).getReg(); @@ -5514,9 +5517,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, Src0 = IsS16Vec ? B.buildBitcast(S32, Src0Parts.getReg(i)).getReg(0) : Src0Parts.getReg(i); PartialRes.push_back( -(B.buildIntrinsic(IID, {S32}) - .addUse(Src0) - .getReg(0))); +(B.buildIntrinsic(IID, {S32}).addUse(Src0).getReg(0))); } break; @@ -5526,7 +5527,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, case Intrinsic::amdgcn_permlanex16: { Register Src1 = MI.getOperand(3).getReg(); Register Src2 = MI.getOperand(4).getReg(); - + Register SrcX = IsPermLane16 ? Src1 : Src2; MachineInstrBuilder SrcXParts; @@ -5547,9 +5548,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, : Src0Parts.getReg(i); SrcX = IsS16Vec ? B.buildBitcast(S32, SrcXParts.getReg(i)).getReg(0) : SrcXParts.getReg(i); -PartialRes.push_back( IsPermLane16 ? -createLaneOp(Src0, SrcX, Src2) : -createLaneOp(Src0, Src1, SrcX)); +PartialRes.push_back(IsPermLane16 ? createLaneOp(Src0, SrcX, Src2) + : createLaneOp(Src0, Src1, SrcX)); } break; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9e77d20813..5d34ed089f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6092,35 +6092,36 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, unsigned ValSize = VT.getSizeInBits(); unsigned IntrinsicID = N->getConstantOperandVal(0); bool IsPermLane16 = IntrinsicID == Intrinsic::amdgcn_permlane16 || -IntrinsicID == Intrinsic::amdgcn_permlanex16; + IntrinsicID == Intrinsic::amdgcn_permlanex16; bool IsPermLane64 = IntrinsicID == Intrinsic::amdgcn_permlane64; SDValue Src0 = N->getOperand(1); SDLoc SL(N); MVT IntVT =