[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics

2018-04-11 Thread Ivan Kosarev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL329814: [NEON] Support vfma_n and vfms_n intrinsics 
(authored by kosarev, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D45483?vs=141828=142008#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D45483

Files:
  cfe/trunk/include/clang/Basic/arm_neon.td
  cfe/trunk/test/CodeGen/aarch64-neon-2velem.c


Index: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
===
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, 
<1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], 
float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> 
[[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfms_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmsq_n_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> , %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
Index: cfe/trunk/include/clang/Basic/arm_neon.td
===
--- cfe/trunk/include/clang/Basic/arm_neon.td
+++ cfe/trunk/include/clang/Basic/arm_neon.td
@@ -621,8 +621,8 @@
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
 def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
 
-def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>;
-def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>;
 
 def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>;
 def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>;


Index: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
===
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfms_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmsq_n_f32(
 // 

[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics

2018-04-11 Thread Ivan Kosarev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC329814: [NEON] Support vfma_n and vfms_n intrinsics 
(authored by kosarev, committed by ).

Repository:
  rL LLVM

https://reviews.llvm.org/D45483

Files:
  include/clang/Basic/arm_neon.td
  test/CodeGen/aarch64-neon-2velem.c


Index: include/clang/Basic/arm_neon.td
===
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -621,8 +621,8 @@
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
 def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
 
-def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>;
-def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>;
 
 def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>;
 def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>;
Index: test/CodeGen/aarch64-neon-2velem.c
===
--- test/CodeGen/aarch64-neon-2velem.c
+++ test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, 
<1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], 
float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> 
[[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfms_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmsq_n_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> , %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0


Index: include/clang/Basic/arm_neon.td
===
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -621,8 +621,8 @@
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
 def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
 
-def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>;
-def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>;
 
 def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>;
 def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>;
Index: test/CodeGen/aarch64-neon-2velem.c
===
--- test/CodeGen/aarch64-neon-2velem.c
+++ test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   

[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics

2018-04-10 Thread Tim Northover via Phabricator via cfe-commits
t.p.northover accepted this revision.
t.p.northover added a comment.
This revision is now accepted and ready to land.

Looks fine to me.


https://reviews.llvm.org/D45483



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics

2018-04-10 Thread Ivan Kosarev via Phabricator via cfe-commits
kosarev created this revision.
kosarev added reviewers: az, sbaranga, t.p.northover.
kosarev added a project: clang.
Herald added subscribers: javed.absar, rengolin.

https://reviews.llvm.org/D45483

Files:
  include/clang/Basic/arm_neon.td
  test/CodeGen/aarch64-neon-2velem.c


Index: test/CodeGen/aarch64-neon-2velem.c
===
--- test/CodeGen/aarch64-neon-2velem.c
+++ test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, 
<1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], 
float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, 
i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> 
[[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfms_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmsq_n_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> , %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
Index: include/clang/Basic/arm_neon.td
===
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -621,8 +621,8 @@
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
 def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
 
-def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>;
-def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>;
 
 def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>;
 def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>;


Index: test/CodeGen/aarch64-neon-2velem.c
===
--- test/CodeGen/aarch64-neon-2velem.c
+++ test/CodeGen/aarch64-neon-2velem.c
@@ -3083,6 +3083,17 @@
   return vfma_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfma_n_f64(
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfma_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
@@ -3110,6 +3121,18 @@
   return vfms_n_f32(a, b, n);
 }
 
+// CHECK-LABEL: @test_vfms_n_f64(
+// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> , %b
+// CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0
+// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
+// CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a)
+// CHECK:   ret <1 x double> [[TMP3]]
+float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
+  return vfms_n_f64(a, b, n);
+}
+
 // CHECK-LABEL: @test_vfmsq_n_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> , %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
Index: include/clang/Basic/arm_neon.td
===
--- include/clang/Basic/arm_neon.td
+++