[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
This revision was automatically updated to reflect the committed changes. NeHuang marked 5 inline comments as done. Closed by commit rG18fe0a0d9eb1: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics… (authored by NeHuang). Changed prior to commit: https://reviews.llvm.org/D112285?vs=384768=386517#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 Files: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll @@ -127,6 +127,213 @@ ret void } +; tw -> twi +define dso_local void @test__twi_boundary_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32767, i32 3) + ret void +} + +define dso_local void @test__twi_boundary_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32767, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32768, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32768, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32769, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32769, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twlgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twlgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 0, i32 1) + ret void +} + +define dso_local void @test__twllti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twllti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 0, i32 %a, i32 1) + ret void +} + +define dso_local void @test__twllti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twllti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 1, i32 2) + ret void +} + +define dso_local void @test__twlgti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twlgti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 1, i32 %a, i32 2) + ret void +} + +define dso_local void @test__tweqi_reg_imm(i32 %a) { +; CHECK-LABEL: test__tweqi_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 2, i32 4) + ret void +} + +define dso_local void @test__tweqi_imm_reg(i32 %a) { +; CHECK-LABEL: test__tweqi_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 2, i32 %a, i32 4) + ret void +} + +define dso_local void @test__twgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twgti r3, 16 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 16, i32 8) + ret void +} + +define
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
amyk accepted this revision. amyk added a comment. Aside from Nemanja's comments, this patch LGTM. Thanks for addressing the comments! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
nemanjai accepted this revision. nemanjai added a comment. This revision is now accepted and ready to land. LGTM other than a number of stylistic changes. Feel free to address those on the commit. You also might want to give @amyk a bit of time to ensure her comments were adequately addressed. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5017 +Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW; + } + // We will emit PPC::TDI or PPC::TWI if the 2nd and 3rd operands are reg + Nit: no braces for a single statement. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5020 + // imm or imm + reg. + else { +Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI; Nit: keep the `else` on the line immediately following the end of the `if` block so it is visually easy to match them up (the comments for `else/else if` go into the block). Also, rather than the structure: ``` if (something) { } else { if (something else) ... else ... } ``` Opt for a more flat structure of ``` if (something) else if (something else) else ``` Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5021 + else { +Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI; +// The 2nd and 3rd operands are reg + imm. If you initialize `Opcode` this way at the declaration, you won't need this here and can flatten this as per my above comment. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5025 + Ops[2] = getI32Imm(int(SImmOperand3) & 0x, dl); +} +// The 2nd and 3rd operands are imm + reg. Nit: no braces with a single statement. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5037 +TO = (TO & 0x1) ? TO + 1 : TO - 1; + // We swap the fourth and fifthy bit of TO if they are not same. + if ((TO & 0x8) != ((TO & 0x10) >> 1)) s/fifthy/fifth Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
NeHuang updated this revision to Diff 384768. NeHuang marked 3 inline comments as done. NeHuang added a comment. Address review comments from @nemanjai Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 Files: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll @@ -127,6 +127,213 @@ ret void } +; tw -> twi +define dso_local void @test__twi_boundary_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32767, i32 3) + ret void +} + +define dso_local void @test__twi_boundary_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32767, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32768, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32768, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32769, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32769, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twlgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twlgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 0, i32 1) + ret void +} + +define dso_local void @test__twllti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twllti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 0, i32 %a, i32 1) + ret void +} + +define dso_local void @test__twllti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twllti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 1, i32 2) + ret void +} + +define dso_local void @test__twlgti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twlgti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 1, i32 %a, i32 2) + ret void +} + +define dso_local void @test__tweqi_reg_imm(i32 %a) { +; CHECK-LABEL: test__tweqi_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 2, i32 4) + ret void +} + +define dso_local void @test__tweqi_imm_reg(i32 %a) { +; CHECK-LABEL: test__tweqi_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 2, i32 %a, i32 4) + ret void +} + +define dso_local void @test__twgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twgti r3, 16 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 16, i32 8) + ret void +} + +define dso_local void @test__twlti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twlti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twlti r3, 16 +; CHECK-NEXT:blr + call void
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
NeHuang updated this revision to Diff 384479. NeHuang marked 5 inline comments as done. NeHuang added a comment. Addressed review comments from @amy Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 Files: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll @@ -127,6 +127,213 @@ ret void } +; tw -> twi +define dso_local void @test__twi_boundary_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32767, i32 3) + ret void +} + +define dso_local void @test__twi_boundary_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32767, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_reg_imm(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32768, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1_imm_reg(i32 %a) { +; CHECK-LABEL: test__twi_boundary1_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, r3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32768, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary2_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, 0 +; CHECK-NEXT:ori r4, r4, 32768 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 32768, i32 %a, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_reg_imm(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r3, r4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32769, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3_imm_reg(i32 %a) { +; CHECK-LABEL: test__tw_boundary3_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:lis r4, -1 +; CHECK-NEXT:ori r4, r4, 32767 +; CHECK-NEXT:tw 3, r4, r3 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 -32769, i32 %a, i32 3) + ret void +} + +define dso_local void @test__twlgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twlgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 0, i32 1) + ret void +} + +define dso_local void @test__twllti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twllti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 0, i32 %a, i32 1) + ret void +} + +define dso_local void @test__twllti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twllti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 1, i32 2) + ret void +} + +define dso_local void @test__twlgti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twlgti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti r3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 1, i32 %a, i32 2) + ret void +} + +define dso_local void @test__tweqi_reg_imm(i32 %a) { +; CHECK-LABEL: test__tweqi_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 2, i32 4) + ret void +} + +define dso_local void @test__tweqi_imm_reg(i32 %a) { +; CHECK-LABEL: test__tweqi_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi r3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 2, i32 %a, i32 4) + ret void +} + +define dso_local void @test__twgti_reg_imm(i32 %a) { +; CHECK-LABEL: test__twgti_reg_imm: +; CHECK: # %bb.0: +; CHECK-NEXT:twgti r3, 16 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 16, i32 8) + ret void +} + +define dso_local void @test__twlti_imm_reg(i32 %a) { +; CHECK-LABEL: test__twlti_imm_reg: +; CHECK: # %bb.0: +; CHECK-NEXT:twlti r3, 16 +; CHECK-NEXT:blr + call void
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
nemanjai requested changes to this revision. nemanjai added inline comments. This revision now requires changes to proceed. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5001 + case ISD::INTRINSIC_VOID: { +if (N->getConstantOperandVal(1) == Intrinsic::ppc_tdw || +N->getConstantOperandVal(1) == Intrinsic::ppc_tw) { amyk wrote: > Might be a good idea to save `N->getConstantOperandVal(1)` since it is being > accessed quite a few times here. +1 Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5003 +N->getConstantOperandVal(1) == Intrinsic::ppc_tw) { + unsigned Opcode = 0; + int16_t SImmOperand2; amyk wrote: > I see we emit TDI/TWI in 2/3 cases, so I was wondering if it make sense pull > out setting the opcode in the second and third case to have the default > opcode be: > ``` > Opcode = N->getConstantOperandVal(1) == Intrinsic::ppc_tdw ? PPC::TDI >: PPC::TWI; > ``` > And then we just set the opcode to TD/TW in the first case? +1 Same thing with the `Ops` vector. Pre-populate it and then only change the operand that needs to be changed. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5012 + isIntS16Immediate(N->getOperand(3), SImmOperand3); + // Will emit TD/TW if 2nd and 3rd operands are reg + reg or imm + imm + if (isOperand2IntS16Immediate == isOperand3IntS16Immediate) { Nit: complete sentences please. Here and in other comments. Also, please add a comment stating that the `imm + imm` form will be optimized to either an unconditional trap or a nop in a later pass. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5036 + "4th operand is not an Immediate"); +int16_t TO = int(SImmOperand4) & 0x1F; +// when first and second bit of TO not same, swap them This will be an uninitialized variable when compiled without asserts. Comment at: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll:3-7 +; RUN: --ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s This change should just be pre-committed Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
NeHuang added inline comments. Comment at: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll:131 +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, 32767 +; CHECK-NEXT:blr amyk wrote: > amyk wrote: > > nemanjai wrote: > > > Can we add `-ppc-asm-full-reg-names` to the RUN lines so it is more clear > > > which operand is a register and which is an immediate. This works on AIX > > > now since https://reviews.llvm.org/D94282 landed. > > Maybe it would be good to pre-commit the change with > > `-ppc-asm-full-reg-names` added to the run lines so then this patch can > > only contain the pertinent `td`/`tdi`/`tw`/`twi` changes. > I meant, maybe it is a better idea to commit the test cases with > `-ppc-asm-full-reg-names` first, so then this revision does not contain the > additional updates of adding the registers in places that is not affected by > your patch. However, perhaps if Nemanja thinks adding the option to this > patch is OK, then that's fine with me, too. Good catch. Let me rebase this patch with ToT. The NFC patch was committed at 40cad47fd82ecaf253ba9b11fcd34f67dd557e9d. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
amyk added inline comments. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5001 + case ISD::INTRINSIC_VOID: { +if (N->getConstantOperandVal(1) == Intrinsic::ppc_tdw || +N->getConstantOperandVal(1) == Intrinsic::ppc_tw) { Might be a good idea to save `N->getConstantOperandVal(1)` since it is being accessed quite a few times here. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5003 +N->getConstantOperandVal(1) == Intrinsic::ppc_tw) { + unsigned Opcode = 0; + int16_t SImmOperand2; I see we emit TDI/TWI in 2/3 cases, so I was wondering if it make sense pull out setting the opcode in the second and third case to have the default opcode be: ``` Opcode = N->getConstantOperandVal(1) == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI; ``` And then we just set the opcode to TD/TW in the first case? Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5038 +// when first and second bit of TO not same, swap them +if ((TO & 0x1) != ((TO & 0x2) >> 1)) { + TO = (TO & 0x1) ? TO + 1 : TO - 1; nit: Curly braces can be removed. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5042 +// when third and fourth bit of TO not same, swap them +if ((TO & 0x8) != ((TO & 0x10) >> 1)) { + TO = (TO & 0x8) ? TO + 8 : TO - 8; nit: Curly braces can be removed. Comment at: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll:131 +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, 32767 +; CHECK-NEXT:blr amyk wrote: > nemanjai wrote: > > Can we add `-ppc-asm-full-reg-names` to the RUN lines so it is more clear > > which operand is a register and which is an immediate. This works on AIX > > now since https://reviews.llvm.org/D94282 landed. > Maybe it would be good to pre-commit the change with > `-ppc-asm-full-reg-names` added to the run lines so then this patch can only > contain the pertinent `td`/`tdi`/`tw`/`twi` changes. I meant, maybe it is a better idea to commit the test cases with `-ppc-asm-full-reg-names` first, so then this revision does not contain the additional updates of adding the registers in places that is not affected by your patch. However, perhaps if Nemanja thinks adding the option to this patch is OK, then that's fine with me, too. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
NeHuang updated this revision to Diff 383506. NeHuang added a comment. Addressed review comments from @nemanjai and @amyk Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 Files: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: --ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ -; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: --ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ -; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: --ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s ; tw declare void @llvm.ppc.tw(i32 %a, i32 %b, i32 %c) define dso_local void @test__twlgt(i32 %a, i32 %b) { ; CHECK-LABEL: test__twlgt: ; CHECK: # %bb.0: -; CHECK-NEXT:twlgt 3, 4 +; CHECK-NEXT:twlgt r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 1) ret void @@ -22,7 +22,7 @@ define dso_local void @test__twllt(i32 %a, i32 %b) { ; CHECK-LABEL: test__twllt: ; CHECK: # %bb.0: -; CHECK-NEXT:twllt 3, 4 +; CHECK-NEXT:twllt r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 2) ret void @@ -31,7 +31,7 @@ define dso_local void @test__tw3(i32 %a, i32 %b) { ; CHECK-LABEL: test__tw3: ; CHECK: # %bb.0: -; CHECK-NEXT:tw 3, 3, 4 +; CHECK-NEXT:tw 3, r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 3) ret void @@ -40,7 +40,7 @@ define dso_local void @test__tweq(i32 %a, i32 %b) { ; CHECK-LABEL: test__tweq: ; CHECK: # %bb.0: -; CHECK-NEXT:tweq 3, 4 +; CHECK-NEXT:tweq r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 4) ret void @@ -49,7 +49,7 @@ define dso_local void @test__twlge(i32 %a, i32 %b) { ; CHECK-LABEL: test__twlge: ; CHECK: # %bb.0: -; CHECK-NEXT:tw 5, 3, 4 +; CHECK-NEXT:tw 5, r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 5) ret void @@ -58,7 +58,7 @@ define dso_local void @test__twlle(i32 %a, i32 %b) { ; CHECK-LABEL: test__twlle: ; CHECK: # %bb.0: -; CHECK-NEXT:tw 6, 3, 4 +; CHECK-NEXT:tw 6, r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 6) ret void @@ -67,7 +67,7 @@ define dso_local void @test__twgt(i32 %a, i32 %b) { ; CHECK-LABEL: test__twgt: ; CHECK: # %bb.0: -; CHECK-NEXT:twgt 3, 4 +; CHECK-NEXT:twgt r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 8) ret void @@ -76,7 +76,7 @@ define dso_local void @test__twge(i32 %a, i32 %b) { ; CHECK-LABEL: test__twge: ; CHECK: # %bb.0: -; CHECK-NEXT:tw 12, 3, 4 +; CHECK-NEXT:tw 12, r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 12) ret void @@ -85,7 +85,7 @@ define dso_local void @test__twlt(i32 %a, i32 %b) { ; CHECK-LABEL: test__twlt: ; CHECK: # %bb.0: -; CHECK-NEXT:twlt 3, 4 +; CHECK-NEXT:twlt r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 16) ret void @@ -94,7 +94,7 @@ define dso_local void @test__twle(i32 %a, i32 %b) { ; CHECK-LABEL: test__twle: ; CHECK: # %bb.0: -; CHECK-NEXT:tw 20, 3, 4 +; CHECK-NEXT:tw 20, r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 20) ret void @@ -103,7 +103,7 @@ define dso_local void @test__twne24(i32 %a, i32 %b) { ; CHECK-LABEL: test__twne24: ; CHECK: # %bb.0: -; CHECK-NEXT:twne 3, 4 +; CHECK-NEXT:twne r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 24) ret void @@ -112,7 +112,7 @@ define dso_local void @test__twu(i32 %a, i32 %b) { ; CHECK-LABEL: test__twu: ; CHECK: # %bb.0: -; CHECK-NEXT:twu 3, 4 +; CHECK-NEXT:twu r3, r4 ; CHECK-NEXT:blr call void @llvm.ppc.tw(i32 %a, i32 %b, i32 31) ret void @@ -121,18 +121,225 @@ define dso_local void @test__tw_no_match(i32 %a, i32 %b) { ; CHECK-LABEL: test__tw_no_match: ; CHECK: # %bb.0: -;
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
amyk added inline comments. Comment at: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll:131 +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, 32767 +; CHECK-NEXT:blr nemanjai wrote: > Can we add `-ppc-asm-full-reg-names` to the RUN lines so it is more clear > which operand is a register and which is an immediate. This works on AIX now > since https://reviews.llvm.org/D94282 landed. Maybe it would be good to pre-commit the change with `-ppc-asm-full-reg-names` added to the run lines so then this patch can only contain the pertinent `td`/`tdi`/`tw`/`twi` changes. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
nemanjai added inline comments. Comment at: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:5003 +N->getConstantOperandVal(1) == Intrinsic::ppc_tw) { + int16_t SImm; + if (isIntS16Immediate(N->getOperand(3), SImm)) { I think a couple of improvements can be made here: 1. Populate the `Opcode` and `Ops` array based on various conditions and add a single call to `SelectNodeTo()` 2. Handle the first input being constant (and then flip the condition) Comment at: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll:131 +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, 32767 +; CHECK-NEXT:blr Can we add `-ppc-asm-full-reg-names` to the RUN lines so it is more clear which operand is a register and which is an immediate. This works on AIX now since https://reviews.llvm.org/D94282 landed. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D112285/new/ https://reviews.llvm.org/D112285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D112285: [PowerPC] PPC backend optimization to lower int_ppc_tdw/int_ppc_tw intrinsics to TDI/TWI machine instructions
NeHuang created this revision. NeHuang added reviewers: nemanjai, stefanp, PowerPC. NeHuang added a project: LLVM. Herald added subscribers: shchenz, kbarton, hiraditya. NeHuang requested review of this revision. This patch adds the backend optimization to match XL behavior for the two builtins `__tdw` and `__tw` that when the second input argument is an immediate, emitting `tdi`/`twi` instructions instead of `td`/`tw`. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D112285 Files: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll @@ -127,6 +127,110 @@ ret void } +; tw -> twi +define dso_local void @test__twi_boundary(i32 %a) { +; CHECK-LABEL: test__twi_boundary: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, 3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32767, i32 3) + ret void +} + +define dso_local void @test__twi_boundary1(i32 %a) { +; CHECK-LABEL: test__twi_boundary1: +; CHECK: # %bb.0: +; CHECK-NEXT:twi 3, 3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32768, i32 3) + ret void +} + +define dso_local void @test__tw_boundary2(i32 %a) { +; CHECK-LABEL: test__tw_boundary2: +; CHECK: # %bb.0: +; CHECK-NEXT:lis 4, 0 +; CHECK-NEXT:ori 4, 4, 32768 +; CHECK-NEXT:tw 3, 3, 4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 32768, i32 3) + ret void +} + +define dso_local void @test__tw_boundary3(i32 %a) { +; CHECK-LABEL: test__tw_boundary3: +; CHECK: # %bb.0: +; CHECK-NEXT:lis 4, -1 +; CHECK-NEXT:ori 4, 4, 32767 +; CHECK-NEXT:tw 3, 3, 4 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 -32769, i32 3) + ret void +} + +define dso_local void @test__twlgti(i32 %a) { +; CHECK-LABEL: test__twlgti: +; CHECK: # %bb.0: +; CHECK-NEXT:twlgti 3, 0 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 0, i32 1) + ret void +} + +define dso_local void @test__twllti(i32 %a) { +; CHECK-LABEL: test__twllti: +; CHECK: # %bb.0: +; CHECK-NEXT:twllti 3, 1 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 1, i32 2) + ret void +} + +define dso_local void @test__tweqi(i32 %a) { +; CHECK-LABEL: test__tweqi: +; CHECK: # %bb.0: +; CHECK-NEXT:tweqi 3, 2 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 2, i32 4) + ret void +} + +define dso_local void @test__twgti(i32 %a) { +; CHECK-LABEL: test__twgti: +; CHECK: # %bb.0: +; CHECK-NEXT:twgti 3, 16 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 16, i32 8) + ret void +} + +define dso_local void @test__twlti(i32 %a) { +; CHECK-LABEL: test__twlti: +; CHECK: # %bb.0: +; CHECK-NEXT:twlti 3, 64 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 64, i32 16) + ret void +} + +define dso_local void @test__twnei(i32 %a) { +; CHECK-LABEL: test__twnei: +; CHECK: # %bb.0: +; CHECK-NEXT:twnei 3, 256 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 256, i32 24) + ret void +} + +define dso_local void @test__twui(i32 %a) { +; CHECK-LABEL: test__twui: +; CHECK: # %bb.0: +; CHECK-NEXT:twui 3, 512 +; CHECK-NEXT:blr + call void @llvm.ppc.tw(i32 %a, i32 512, i32 31) + ret void +} + ; trap declare void @llvm.ppc.trap(i32 %a) define dso_local void @test__trap(i32 %a) { Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll === --- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll @@ -124,6 +124,110 @@ ret void } +; tdw -> tdi +define dso_local void @test__tdi_boundary(i64 %a) { +; CHECK-LABEL: test__tdi_boundary: +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, 32767 +; CHECK-NEXT:blr + call void @llvm.ppc.tdw(i64 %a, i64 32767, i32 3) + ret void +} + +define dso_local void @test__tdi_boundary1(i64 %a) { +; CHECK-LABEL: test__tdi_boundary1: +; CHECK: # %bb.0: +; CHECK-NEXT:tdi 3, 3, -32768 +; CHECK-NEXT:blr + call void @llvm.ppc.tdw(i64 %a, i64 -32768, i32 3) + ret void +} + +define dso_local void @test__td_boundary2(i64 %a) { +; CHECK-LABEL: test__td_boundary2: +; CHECK: # %bb.0: +; CHECK-NEXT:li 4, 0 +; CHECK-NEXT:ori 4, 4, 32768 +; CHECK-NEXT:td 3, 3, 4 +; CHECK-NEXT:blr + call void @llvm.ppc.tdw(i64 %a, i64 32768, i32 3) + ret void +} + +define dso_local void @test__td_boundary3(i64 %a) { +; CHECK-LABEL: test__td_boundary3: +; CHECK: # %bb.0: +;