================
@@ -2230,12 +2223,53 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vfma_lane_v:
   case NEON::BI__builtin_neon_vfmaq_lane_v:
   case NEON::BI__builtin_neon_vfma_laneq_v:
-  case NEON::BI__builtin_neon_vfmaq_laneq_v:
+  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
+    // Keep the NEON vector type local to each vector-only builtin block.
+    cir::VectorType ty = getNeonType(this, type, loc);
+    if (!ty)
+      return nullptr;
+    mlir::Value addend = ops[0];
+    mlir::Value multiplicand = ops[1];
+    mlir::Value laneSource = ops[2];
+    auto vecTy = mlir::cast<cir::VectorType>(ty);
+    auto elemTy = vecTy.getElementType();
+    auto numElts = vecTy.getSize();
+
+    if (addend.getType() != ty)
+      addend = builder.createBitcast(loc, addend, ty);
+    if (multiplicand.getType() != ty)
+      multiplicand = builder.createBitcast(loc, multiplicand, ty);
+
+    cir::VectorType sourceTy = ty;
+    if (builtinID == NEON::BI__builtin_neon_vfmaq_lane_v)
+      sourceTy = cir::VectorType::get(elemTy, numElts / 2);
+    else if (builtinID == NEON::BI__builtin_neon_vfma_laneq_v)
+      sourceTy = cir::VectorType::get(elemTy, numElts * 2);
+
+    if (laneSource.getType() != sourceTy)
+      laneSource = builder.createBitcast(loc, laneSource, sourceTy);
+
+    int64_t lane =
+        expr->getArg(3)->EvaluateKnownConstInt(getContext()).getSExtValue();
+    llvm::SmallVector<int64_t> mask(numElts, lane);
+    mlir::Value splat = builder.createVecShuffle(loc, laneSource, mask);
+
+    llvm::SmallVector<mlir::Value> fmaOps = {multiplicand, splat, addend};
+    return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", ty, fmaOps);
+  }
----------------
banach-space wrote:

This block is very involved compared to 
http://github.com/llvm/llvm-project/blob/8d7823ea8f40cf5df1c623018bf9c0a308fa4a36/clang/lib/CodeGen/TargetBuiltins/ARM.cpp?plain=1#L6153-L6162
 and 
https://github.com/llvm/clangir/blob/main/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp#L4246-L4264

Why not follow the pre-existing logic that is much shorter?

https://github.com/llvm/llvm-project/pull/188190
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to