================
@@ -3128,6 +3128,52 @@ static bool interp__builtin_ia32_vpconflict(InterpState 
&S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_shuf(InterpState &S, CodePtr OpPC,
+                                      const CallExpr *Call) {
+  assert(Call->getNumArgs() == 3);
+
+  unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+  QualType Arg0Type = Call->getArg(0)->getType();
+  const auto *VecT = Arg0Type->castAs<VectorType>();
+  PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+  unsigned NumElems = VecT->getNumElements();
+  unsigned LaneWidth = S.getContext().getBitWidth(VecT->getElementType());
+  unsigned NumLanes = LaneWidth * NumElems / 128;
+  unsigned NumElemPerLane = 128 / LaneWidth;
+
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  unsigned NumSelectableElems = NumElemPerLane / 2;
+  unsigned BitsPerElem = NumSelectableElems == 1 ? 1 : 2;
+  unsigned IndexMask = BitsPerElem == 2 ? 0x3 : 0x1;
+  unsigned MaskBits = 8;
+
+  TYPE_SWITCH(ElemT, {
----------------
tbaederr wrote:

Try to minimize what's in the `TYPE_SWITCH` afaics, it's only one assignment 
per loop, so two small type switches would be better.

https://github.com/llvm/llvm-project/pull/164078
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to