================
@@ -702,6 +703,29 @@ Value *VPInstruction::generate(VPTransformState &State) {
{PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, Name);
}
+ // Count the number of bits set in each lane and reduce the result to a
scalar
+ case VPInstruction::PopCount: {
+ Value *Op = State.get(getOperand(0));
+ Type *VT = Op->getType();
+ Value *Cnt = Op;
+
+ // i1 vectors can just use the add reduction. Bigger elements need a ctpop
+ // first.
+ if (VT->getScalarSizeInBits() > 1)
+ Cnt = Builder.CreateIntrinsic(Intrinsic::ctpop, {VT}, {Cnt});
+
+ auto *VecVT = cast<VectorType>(VT);
+ // Extend to an i8 since i1 is too small to add with
+ if (VecVT->getElementType()->getScalarSizeInBits() < 8) {
+ Cnt = Builder.CreateCast(
+ Instruction::ZExt, Cnt,
+ VectorType::get(Builder.getInt8Ty(), VecVT->getElementCount()));
----------------
sdesmalen-arm wrote:
We can't use a hard-coded i8, as this may not be sufficient to represent the
number of bits from popcount.
https://github.com/llvm/llvm-project/pull/100579
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits