[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics
This revision was automatically updated to reflect the committed changes. Closed by commit rL329814: [NEON] Support vfma_n and vfms_n intrinsics (authored by kosarev, committed by ). Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D45483?vs=141828=142008#toc Repository: rL LLVM https://reviews.llvm.org/D45483 Files: cfe/trunk/include/clang/Basic/arm_neon.td cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Index: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c === --- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c +++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfms_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmsq_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 Index: cfe/trunk/include/clang/Basic/arm_neon.td === --- cfe/trunk/include/clang/Basic/arm_neon.td +++ cfe/trunk/include/clang/Basic/arm_neon.td @@ -621,8 +621,8 @@ // MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; Index: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c === --- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c +++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfms_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmsq_n_f32( //
[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics
This revision was automatically updated to reflect the committed changes. Closed by commit rC329814: [NEON] Support vfma_n and vfms_n intrinsics (authored by kosarev, committed by ). Repository: rL LLVM https://reviews.llvm.org/D45483 Files: include/clang/Basic/arm_neon.td test/CodeGen/aarch64-neon-2velem.c Index: include/clang/Basic/arm_neon.td === --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -621,8 +621,8 @@ // MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; Index: test/CodeGen/aarch64-neon-2velem.c === --- test/CodeGen/aarch64-neon-2velem.c +++ test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfms_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmsq_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 Index: include/clang/Basic/arm_neon.td === --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -621,8 +621,8 @@ // MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; Index: test/CodeGen/aarch64-neon-2velem.c === --- test/CodeGen/aarch64-neon-2velem.c +++ test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK:
[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics
t.p.northover accepted this revision. t.p.northover added a comment. This revision is now accepted and ready to land. Looks fine to me. https://reviews.llvm.org/D45483 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45483: [NEON] Support vfma_n and vfms_n intrinsics
kosarev created this revision. kosarev added reviewers: az, sbaranga, t.p.northover. kosarev added a project: clang. Herald added subscribers: javed.absar, rengolin. https://reviews.llvm.org/D45483 Files: include/clang/Basic/arm_neon.td test/CodeGen/aarch64-neon-2velem.c Index: test/CodeGen/aarch64-neon-2velem.c === --- test/CodeGen/aarch64-neon-2velem.c +++ test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfms_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmsq_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 Index: include/clang/Basic/arm_neon.td === --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -621,8 +621,8 @@ // MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "ddds", "fdQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; Index: test/CodeGen/aarch64-neon-2velem.c === --- test/CodeGen/aarch64-neon-2velem.c +++ test/CodeGen/aarch64-neon-2velem.c @@ -3083,6 +3083,17 @@ return vfma_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfma_n_f64( +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfma_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3110,6 +3121,18 @@ return vfms_n_f32(a, b, n); } +// CHECK-LABEL: @test_vfms_n_f64( +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b +// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) +// CHECK: ret <1 x double> [[TMP3]] +float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { + return vfms_n_f64(a, b, n); +} + // CHECK-LABEL: @test_vfmsq_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 Index: include/clang/Basic/arm_neon.td === --- include/clang/Basic/arm_neon.td +++