Hi t.p.northover,
Hi Tim and reviewers,
Two patches for LLVM and Clang to add missing ACLE intrinsics mapping to
general arithmetic operation from VFP instructions.
Review please.
Thanks,
-Hao
http://llvm-reviews.chandlerc.com/D2298
Files:
lib/Target/AArch64/AArch64InstrNEON.td
test/CodeGen/AArch64/neon-add-sub.ll
tools/clang/include/clang/Basic/arm_neon.td
tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -5788,6 +5788,37 @@
defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
+// Scalar general arithmetic operation
+class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INST FPR64:$Rn, FPR64:$Rm)>;
+
+class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
+ (v1f64 FPR64:$Ra))),
+ (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
+
+def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
+def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
+
+def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
+def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
Index: test/CodeGen/AArch64/neon-add-sub.ll
===================================================================
--- test/CodeGen/AArch64/neon-add-sub.ll
+++ test/CodeGen/AArch64/neon-add-sub.ll
@@ -118,3 +118,120 @@
ret <2 x double> %tmp3
}
+define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vadd_f64
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fadd <1 x double> %a, %b
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmul_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fmul <1 x double> %a, %b
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vdiv_f64
+; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fdiv <1 x double> %a, %b
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmla_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fmul <1 x double> %b, %c
+ %2 = fadd <1 x double> %1, %a
+ ret <1 x double> %2
+}
+
+define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmls_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fmul <1 x double> %b, %c
+ %2 = fsub <1 x double> %a, %1
+ ret <1 x double> %2
+}
+
+define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfms_f64
+; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fsub <1 x double> <double -0.000000e+00>, %b
+ %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
+ ret <1 x double> %2
+}
+
+define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfma_f64
+; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vsub_f64
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fsub <1 x double> %a, %b
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vabd_f64
+; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmax_f64
+; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmin_f64
+; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmaxnm_f64
+; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vminnm_f64
+; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vabs_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vabs_f64
+; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vneg_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vneg_f64
+; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fsub <1 x double> <double -0.000000e+00>, %a
+ ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
\ No newline at end of file
Index: tools/clang/include/clang/Basic/arm_neon.td
===================================================================
--- tools/clang/include/clang/Basic/arm_neon.td
+++ tools/clang/include/clang/Basic/arm_neon.td
@@ -595,20 +595,22 @@
////////////////////////////////////////////////////////////////////////////////
// Addition
-// With additional Qd type.
-def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>;
+// With additional d, Qd type.
+def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
+ OP_ADD>;
////////////////////////////////////////////////////////////////////////////////
// Subtraction
// With additional Qd type.
-def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>;
+def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
+ OP_SUB>;
////////////////////////////////////////////////////////////////////////////////
// Multiplication
// With additional Qd type.
-def MUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
-def MLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
-def MLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
+def MUL : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
+def MLA : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
+def MLS : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
////////////////////////////////////////////////////////////////////////////////
// Multiplication Extended
@@ -616,13 +618,13 @@
////////////////////////////////////////////////////////////////////////////////
// Division
-def FDIV : IOpInst<"vdiv", "ddd", "fQfQd", OP_DIV>;
+def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>;
////////////////////////////////////////////////////////////////////////////////
// Vector fused multiply-add operations
-// With additional Qd type.
-def FMLA : SInst<"vfma", "dddd", "fQfQd">;
-def FMLS : SInst<"vfms", "dddd", "fQfQd">;
+// With additional d, Qd type.
+def FMLA : SInst<"vfma", "dddd", "fdQfQd">;
+def FMLS : SInst<"vfms", "dddd", "fdQfQd">;
////////////////////////////////////////////////////////////////////////////////
// Logical operations
@@ -632,15 +634,15 @@
////////////////////////////////////////////////////////////////////////////////
// Absolute Difference
-// With additional Qd type.
-def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+// With additional d, Qd type.
+def ABD : SInst<"vabd", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
////////////////////////////////////////////////////////////////////////////////
// saturating absolute/negate
// With additional Qd/Ql type.
-def ABS : SInst<"vabs", "dd", "csifQcQsQiQfQlQd">;
+def ABS : SInst<"vabs", "dd", "csifdQcQsQiQfQlQd">;
def QABS : SInst<"vqabs", "dd", "csiQcQsQiQl">;
-def NEG : SOpInst<"vneg", "dd", "csifQcQsQiQfQdQl", OP_NEG>;
+def NEG : SOpInst<"vneg", "dd", "csifdQcQsQiQfQdQl", OP_NEG>;
def QNEG : SInst<"vqneg", "dd", "csiQcQsQiQl">;
////////////////////////////////////////////////////////////////////////////////
@@ -742,13 +744,13 @@
////////////////////////////////////////////////////////////////////////////////
// Max/Min Integer
// With additional Qd type.
-def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
-def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
+def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
////////////////////////////////////////////////////////////////////////////////
// MaxNum/MinNum Floating Point
-def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">;
-def FMINNM : SInst<"vminnm", "ddd", "fQfQd">;
+def FMAXNM : SInst<"vmaxnm", "ddd", "fdQfQd">;
+def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">;
////////////////////////////////////////////////////////////////////////////////
// Pairwise Max/Min
Index: tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -11218,3 +11218,93 @@
// CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return vabdd_f64(a, b);
}
+
+float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vadd_f64
+ return vadd_f64(a, b);
+ // CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vmul_f64
+ return vmul_f64(a, b);
+ // CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vdiv_f64
+ return vdiv_f64(a, b);
+ // CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+ // CHECK-LABEL: test_vmla_f64
+ return vmla_f64(a, b, c);
+ // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+ // CHECK-LABEL: test_vmls_f64
+ return vmls_f64(a, b, c);
+ // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+ // CHECK-LABEL: test_vfma_f64
+ return vfma_f64(a, b, c);
+ // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+ // CHECK-LABEL: test_vfms_f64
+ return vfms_f64(a, b, c);
+ // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vsub_f64
+ return vsub_f64(a, b);
+ // CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vabd_f64
+ return vabd_f64(a, b);
+ // CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmax_f64
+ return vmax_f64(a, b);
+// CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmin_f64
+ return vmin_f64(a, b);
+// CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmaxnm_f64
+ return vmaxnm_f64(a, b);
+// CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vminnm_f64
+ return vminnm_f64(a, b);
+// CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabs_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vabs_f64
+ return vabs_f64(a);
+ // CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vneg_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vneg_f64
+ return vneg_f64(a);
+ // CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits