================
@@ -3589,6 +3596,42 @@ 
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
     Sub = VecOp->getDefiningRecipe();
     VecOp = Tmp;
   }
+
+  // If ValB is a constant and can be safely extended, truncate it to the same
+  // type as ExtA's operand, then extend it to the same type as ExtA. This
+  // creates two uniform extends that can more easily be matched by the rest of
+  // the bundling code. The ExtB reference, ValB and operand 1 of Mul are all
+  // replaced with the new extend of the constant.
+  auto ExtendAndReplaceConstantOp = [&Ctx](VPWidenCastRecipe *ExtA,
+                                           VPWidenCastRecipe *&ExtB,
+                                           VPValue *&ValB, VPWidenRecipe *Mul) 
{
+    if (ExtA && !ExtB && ValB->isLiveIn()) {
+      Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
+      Type *WideTy = Ctx.Types.inferScalarType(ExtA);
+      Instruction::CastOps ExtOpc = ExtA->getOpcode();
+      auto *Const = dyn_cast<ConstantInt>(ValB->getLiveInIRValue());
+      if (Const &&
+          llvm::canConstantBeExtended(
+              Const, NarrowTy, TTI::getPartialReductionExtendKind(ExtOpc))) {
+        // The truncate ensures that the type of each extended operand is the
+        // same, and it's been proven that the constant can be extended from
+        // NarrowTy safely. Necessary since ExtA's extended operand would be
+        // e.g. an i8, while the const will likely be an i32. This will be
+        // elided by later optimisations.
+        auto *Trunc =
+            new VPWidenCastRecipe(Instruction::CastOps::Trunc, ValB, NarrowTy);
+        Trunc->insertBefore(*ExtA->getParent(), 
std::next(ExtA->getIterator()));
+
+        VPWidenCastRecipe *NewCast =
+            new VPWidenCastRecipe(ExtOpc, Trunc, WideTy);
+        NewCast->insertAfter(Trunc);
+        ExtB = NewCast;
+        ValB = NewCast;
+        Mul->setOperand(1, NewCast);
----------------
sdesmalen-arm wrote:

The insertion point can be simplified to be the `Mul`, because that's the point 
where we care about the extended input. You can also use VPBuilder, to avoid 
having to create the recipe and then insert it, i.e.
```suggestion
        VPBuilder Builder(Mul);
        auto *Trunc =
            Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, 
NarrowTy);
        Type *WideTy = Ctx.Types.inferScalarType(ExtA);
        ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
        Mul->setOperand(1, ExtB);
```

https://github.com/llvm/llvm-project/pull/162503
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to