================
@@ -702,6 +703,29 @@ Value *VPInstruction::generate(VPTransformState &State) {
                                    {PredTy, ScalarTC->getType()},
                                    {VIVElem0, ScalarTC}, nullptr, Name);
   }
+  // Count the number of bits set in each lane and reduce the result to a 
scalar
+  case VPInstruction::PopCount: {
+    Value *Op = State.get(getOperand(0));
+    Type *VT = Op->getType();
+    Value *Cnt = Op;
+
+    // i1 vectors can just use the add reduction. Bigger elements need a ctpop
+    // first.
+    if (VT->getScalarSizeInBits() > 1)
+      Cnt = Builder.CreateIntrinsic(Intrinsic::ctpop, {VT}, {Cnt});
+
+    auto *VecVT = cast<VectorType>(VT);
+    // Extend to an i8 since i1 is too small to add with
+    if (VecVT->getElementType()->getScalarSizeInBits() < 8) {
+      Cnt = Builder.CreateCast(
+          Instruction::ZExt, Cnt,
+          VectorType::get(Builder.getInt8Ty(), VecVT->getElementCount()));
----------------
sdesmalen-arm wrote:

We can't use a hard-coded i8, as this may not be sufficient to represent the 
number of bits from popcount.

https://github.com/llvm/llvm-project/pull/100579
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to