================
@@ -3128,6 +3128,52 @@ static bool interp__builtin_ia32_vpconflict(InterpState
&S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_shuf(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 3);
+
+ unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+ QualType Arg0Type = Call->getArg(0)->getType();
+ const auto *VecT = Arg0Type->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+ unsigned NumElems = VecT->getNumElements();
+ unsigned LaneWidth = S.getContext().getBitWidth(VecT->getElementType());
+ unsigned NumLanes = LaneWidth * NumElems / 128;
+ unsigned NumElemPerLane = 128 / LaneWidth;
+
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned NumSelectableElems = NumElemPerLane / 2;
+ unsigned BitsPerElem = NumSelectableElems == 1 ? 1 : 2;
+ unsigned IndexMask = BitsPerElem == 2 ? 0x3 : 0x1;
+ unsigned MaskBits = 8;
+
+ TYPE_SWITCH(ElemT, {
----------------
tbaederr wrote:
Try to minimize what's in the `TYPE_SWITCH` afaics, it's only one assignment
per loop, so two small type switches would be better.
https://github.com/llvm/llvm-project/pull/164078
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits