[PATCH] [PATCH][AArch64]Add missing ACLE intrinsics mapping to general arithmetic operation from VFP instructions

Hao Liu Mon, 02 Dec 2013 01:48:07 -0800

Hi t.p.northover,

Hi Tim and reviewers,


Two patches for LLVM and Clang to add missing ACLE intrinsics mapping to 
general arithmetic operation from VFP instructions.

Review please.

Thanks,
-Hao

http://llvm-reviews.chandlerc.com/D2298

Files:
  lib/Target/AArch64/AArch64InstrNEON.td
  test/CodeGen/AArch64/neon-add-sub.ll
  tools/clang/include/clang/Basic/arm_neon.td
  tools/clang/test/CodeGen/aarch64-neon-intrinsics.c

Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -5788,6 +5788,37 @@
 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
 
+// Scalar general arithmetic operation
+class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
+                                        Instruction INST> 
+    : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
+                                        Instruction INST> 
+    : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+          (INST FPR64:$Rn, FPR64:$Rm)>;
+
+class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
+                                        Instruction INST> 
+    : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
+              (v1f64 FPR64:$Ra))),
+          (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
+
+def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
+def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
+
+def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
+def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
Index: test/CodeGen/AArch64/neon-add-sub.ll
===================================================================
--- test/CodeGen/AArch64/neon-add-sub.ll
+++ test/CodeGen/AArch64/neon-add-sub.ll
@@ -118,3 +118,120 @@
 	ret <2 x double> %tmp3
 }
 
+define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vadd_f64
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fadd <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmul_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vdiv_f64
+; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fdiv <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmla_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fadd <1 x double> %1, %a
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmls_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fsub <1 x double> %a, %1
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfms_f64
+; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %b
+  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfma_f64
+; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vsub_f64
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vabd_f64
+; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmax_f64
+; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmin_f64
+; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmaxnm_f64
+; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vminnm_f64
+; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabs_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vabs_f64
+; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vneg_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vneg_f64
+; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %a
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
\ No newline at end of file
Index: tools/clang/include/clang/Basic/arm_neon.td
===================================================================
--- tools/clang/include/clang/Basic/arm_neon.td
+++ tools/clang/include/clang/Basic/arm_neon.td
@@ -595,20 +595,22 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // Addition
-// With additional Qd type.
-def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>;
+// With additional d, Qd type.
+def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
+                  OP_ADD>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Subtraction
 // With additional Qd type.
-def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>;
+def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
+                  OP_SUB>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Multiplication
 // With additional Qd type.
-def MUL     : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
-def MLA     : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
-def MLS     : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
+def MUL     : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
+def MLA     : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
+def MLS     : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Multiplication Extended
@@ -616,13 +618,13 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // Division
-def FDIV : IOpInst<"vdiv", "ddd",  "fQfQd", OP_DIV>;
+def FDIV : IOpInst<"vdiv", "ddd",  "fdQfQd", OP_DIV>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector fused multiply-add operations
-// With additional Qd type.
-def FMLA : SInst<"vfma", "dddd", "fQfQd">;
-def FMLS : SInst<"vfms", "dddd", "fQfQd">;
+// With additional d, Qd type.
+def FMLA : SInst<"vfma", "dddd", "fdQfQd">;
+def FMLS : SInst<"vfms", "dddd", "fdQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Logical operations
@@ -632,15 +634,15 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // Absolute Difference
-// With additional Qd type.
-def ABD  : SInst<"vabd", "ddd",  "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+// With additional d, Qd type.
+def ABD  : SInst<"vabd", "ddd",  "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // saturating absolute/negate
 // With additional Qd/Ql type.
-def ABS    : SInst<"vabs", "dd", "csifQcQsQiQfQlQd">;
+def ABS    : SInst<"vabs", "dd", "csifdQcQsQiQfQlQd">;
 def QABS   : SInst<"vqabs", "dd", "csiQcQsQiQl">;
-def NEG    : SOpInst<"vneg", "dd", "csifQcQsQiQfQdQl", OP_NEG>;
+def NEG    : SOpInst<"vneg", "dd", "csifdQcQsQiQfQdQl", OP_NEG>;
 def QNEG   : SInst<"vqneg", "dd", "csiQcQsQiQl">;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -742,13 +744,13 @@
 ////////////////////////////////////////////////////////////////////////////////
 // Max/Min Integer
 // With additional Qd type.
-def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
-def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
+def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // MaxNum/MinNum Floating Point
-def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">;
-def FMINNM : SInst<"vminnm", "ddd", "fQfQd">;
+def FMAXNM : SInst<"vmaxnm", "ddd", "fdQfQd">;
+def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Pairwise Max/Min
Index: tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -11218,3 +11218,93 @@
 // CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   return vabdd_f64(a, b);
 }
+
+float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vadd_f64
+  return vadd_f64(a, b);
+  // CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vmul_f64
+  return vmul_f64(a, b);
+  // CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vdiv_f64
+  return vdiv_f64(a, b);
+  // CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vmla_f64
+  return vmla_f64(a, b, c);
+  // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vmls_f64
+  return vmls_f64(a, b, c);
+  // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vfma_f64
+  return vfma_f64(a, b, c);
+  // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vfms_f64
+  return vfms_f64(a, b, c);
+  // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vsub_f64
+  return vsub_f64(a, b);
+  // CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vabd_f64
+  return vabd_f64(a, b);
+  // CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmax_f64
+  return vmax_f64(a, b);
+// CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmin_f64
+  return vmin_f64(a, b);
+// CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmaxnm_f64
+  return vmaxnm_f64(a, b);
+// CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vminnm_f64
+  return vminnm_f64(a, b);
+// CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabs_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vabs_f64
+  return vabs_f64(a);
+  // CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vneg_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vneg_f64
+  return vneg_f64(a);
+  // CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+}

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

[PATCH] [PATCH][AArch64]Add missing ACLE intrinsics mapping to general arithmetic operation from VFP instructions

Reply via email to