================
@@ -5101,6 +5101,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
           return std::pair<unsigned, int>{SrcIdx, Offset};
         });
+  case X86::BI__builtin_ia32_vperm2f128_pd256:
+  case X86::BI__builtin_ia32_vperm2f128_ps256:
+  case X86::BI__builtin_ia32_vperm2f128_si256:
+  case X86::BI__builtin_ia32_permti256:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call,
+        [BuiltinID, Call](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned NumElements =
+              
Call->getArg(0)->getType()->getAs<VectorType>()->getNumElements();
+          unsigned PreservedBitsCnt = NumElements >> 2;
+          unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
+          unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
+
+          if (BuiltinID == X86::BI__builtin_ia32_permti256 &&
+              (ControlBits & 0b1000))
+            return std::make_pair(0u, -1);
----------------
RKSimon wrote:

There is no difference in the zero masking handling between these builtins - 
they all zero out the 128-bit msublane if the relevant `ControlBits & 0b1000` 
is set

https://github.com/llvm/llvm-project/pull/172149
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to