================
@@ -5189,10 +5196,54 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
                 .addReg(NewAccumulator->getOperand(0).getReg())
                 .addImm(1)
                 .setOperandDead(3); // Dead scc
-            BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-                .addReg(SrcReg)
-                .addReg(ParityRegister);
-            break;
+            if (is32BitOpc) {
+              BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+                  .addReg(SrcReg)
+                  .addReg(ParityRegister);
+              break;
+            } else {
+              Register DestSub0 =
+                  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+              Register DestSub1 =
+                  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+              Register Op1H_Op0L_Reg =
+                  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+              Register CarryReg =
----------------
jmmartinez wrote:

> Re: s_bfe_i32 : either ways I will be emitting 2 instrs. I can try this tho.

Don't worry. It if was a `v_mul` there could be a difference according to the 
sched-model, but I just checked and it seems be the same for all scalar 
operations.

https://github.com/llvm/llvm-project/pull/151310
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to