Hi,
While applying some review comments for the AArch64 code, I noticed that the
arguments are marshalled incorrectly when converting a NEON vfma intrinsic
into an LLVM fma version.
The NEON version expects the modified (accumulator) argument to come first,
however the LLVM version follows libm's semantics in putting the accumulator
last.
This patch should fix the issue. Ok to commit?
Cheers.
Tim.
commit 3a0ccbde4d110e0bbf4ad508bff0fedf7837803c
Author: Tim Northover <[email protected]>
Date: Mon Jan 14 09:58:10 2013 +0000
Correct order of operands forwarding NEON vfma to LLVM fma
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 1a53f62..23be9e5 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -2112,7 +2112,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- return Builder.CreateCall3(F, Ops[0], Ops[1], Ops[2]);
+
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
case ARM::BI__builtin_neon_vpadal_v:
case ARM::BI__builtin_neon_vpadalq_v: {
diff --git a/test/CodeGen/arm-neon-fma.c b/test/CodeGen/arm-neon-fma.c
new file mode 100644
index 0000000..7511fe1
--- /dev/null
+++ b/test/CodeGen/arm-neon-fma.c
@@ -0,0 +1,16 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target thumbv7-none-linux-gnueabihf \
+// RUN: -mcpu=cortex-a8 -mfloat-abi=hard \
+// RUN: -O3 -S -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
+ return vfma_f32(accum, lhs, rhs);
+// CHECK: call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum)
+}
+
+float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
+ return vfmaq_f32(accum, lhs, rhs);
+// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum)
+}_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits