================
@@ -512,23 +543,69 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned
builtinID,
case X86::BI__builtin_ia32_insertf64x2_256:
case X86::BI__builtin_ia32_inserti64x2_256:
case X86::BI__builtin_ia32_insertf64x2_512:
- case X86::BI__builtin_ia32_inserti64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512: {
+ unsigned dstNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+ unsigned srcNumElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+ unsigned subVectors = dstNumElts / srcNumElts;
+ assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2
subvectors");
+
+ uint64_t index = getZExtIntValueFromConstOp(ops[2]);
+ index &= subVectors - 1; // Remove any extra bits.
+ index *= srcNumElts;
+
+ int64_t indices[16];
+ for (unsigned i = 0; i != dstNumElts; ++i)
+ indices[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
+
+ mlir::Value op1 = builder.createVecShuffle(
+ getLoc(expr->getExprLoc()), ops[1], ArrayRef(indices, dstNumElts));
+
+ for (unsigned i = 0; i != dstNumElts; ++i) {
+ if (i >= index && i < (index + srcNumElts))
+ indices[i] = (i - index) + dstNumElts;
+ else
+ indices[i] = i;
+ }
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], op1,
+ ArrayRef(indices, dstNumElts));
+ }
case X86::BI__builtin_ia32_pmovqd512_mask:
case X86::BI__builtin_ia32_pmovwb512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendps:
case X86::BI__builtin_ia32_blendpd256:
case X86::BI__builtin_ia32_blendps256:
case X86::BI__builtin_ia32_pblendw256:
case X86::BI__builtin_ia32_pblendd128:
- case X86::BI__builtin_ia32_pblendd256:
+ case X86::BI__builtin_ia32_pblendd256: {
+ uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+
+ int64_t indices[16];
+ // If there are more than 8 elements, the immediate is used twice so make
+ // sure we handle that.
+ for (unsigned i = 0; i != numElts; ++i)
+ indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ ArrayRef(indices, numElts));
+ }
case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
- case X86::BI__builtin_ia32_pshuflw512:
+ case X86::BI__builtin_ia32_pshuflw512: {
+ return emitPshufW(*this, builder, ops, expr, true);
+ }
case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
- case X86::BI__builtin_ia32_pshufhw512:
+ case X86::BI__builtin_ia32_pshufhw512: {
+ return emitPshufW(*this, builder, ops, expr, false);
+ }
case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512:
----------------
HendrikHuebner wrote:
This is missing a NYI (or you implement it along with `pshufd`)
https://github.com/llvm/llvm-project/pull/169178
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits