Hi t.p.northover,
Hi Tim and reviewers,
There are patches for LLVM and Clang to implement missing floating point
convert, round and misc intrinsics with v1f64.
Also as I found there is already an inrinsic functions called llvm.sqrt, I
replaced the llvm.aarch64.neon.fsqrt with it.
Review please.
Thanks,
-Hao
http://llvm-reviews.chandlerc.com/D2297
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
lib/Target/AArch64/AArch64InstrNEON.td
test/CodeGen/AArch64/neon-misc.ll
test/CodeGen/AArch64/neon-simd-shift.ll
tools/clang/include/clang/Basic/arm_neon.td
tools/clang/lib/CodeGen/CGBuiltin.cpp
tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
tools/clang/utils/TableGen/NeonEmitter.cpp
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -341,26 +341,32 @@
setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
}
}
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -4745,6 +4745,29 @@
int_aarch64_neon_vcvtd_n_u64_f64,
FCVTZU_Nssi, FCVTZU_Nddi>;
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
+ SCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
+ UCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
+ FCVTZS_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
+ FCVTZU_Nddi>;
+
// Scalar Integer Add
let isCommutable = 1 in {
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
@@ -4814,6 +4837,8 @@
defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
FRSQRTSddd>;
+def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
+
// Patterns to match llvm.aarch64.* intrinsic for
// Scalar Floating-point Multiply Extended,
multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
@@ -4960,6 +4985,21 @@
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
FCVTZUss, FCVTZUdd>;
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
+
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
+
// Scalar Floating-point Reciprocal Estimate
defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
@@ -4975,6 +5015,18 @@
defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
FRSQRTEss, FRSQRTEdd>;
+// Scalar Floating-point Round
+class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
+def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
+def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
+def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
+def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
+def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
+def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
+
// Scalar Integer Compare
// Scalar Compare Bitwise Equal
@@ -8117,8 +8169,7 @@
int_arm_neon_vrecpe>;
defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
int_arm_neon_vrsqrte>;
-defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111,
- int_aarch64_neon_fsqrt>;
+defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
bits<5> opcode, SDPatternOperator Neon_Op> {
Index: test/CodeGen/AArch64/neon-misc.ll
===================================================================
--- test/CodeGen/AArch64/neon-misc.ll
+++ test/CodeGen/AArch64/neon-misc.ll
@@ -1274,19 +1274,19 @@
define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
- %vsqrt1.i = tail call <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float> %a) #4
+ %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
ret <2 x float> %vsqrt1.i
}
define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
- %vsqrt1.i = tail call <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float> %a) #4
+ %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
ret <4 x float> %vsqrt1.i
}
define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
- %vsqrt1.i = tail call <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double> %a) #4
+ %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
ret <2 x double> %vsqrt1.i
}
@@ -1326,11 +1326,11 @@
ret <2 x double> %vcvt.i
}
-declare <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double>) #2
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
-declare <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float>) #2
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
-declare <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float>) #2
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
@@ -1607,3 +1607,193 @@
declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
+define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fptosi <1 x double> %a to <1 x i64>
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fptoui <1 x double> %a to <1 x i64>
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_s64_f64
+; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_u64_f64
+; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_s64_f64
+; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_u64_f64
+; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_s64_f64
+; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_u64_f64
+; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_s64_f64
+; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_u64_f64
+; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+ ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = sitofp <1 x i64> %a to <1 x double>
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = uitofp <1 x i64> %a to <1 x double>
+ ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define <1 x double> @test_vrndn_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndn_f64
+; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnda_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnda_f64
+; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndp_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndp_f64
+; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndm_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndm_f64
+; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndx_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndx_f64
+; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnd_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnd_f64
+; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndi_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndi_f64
+; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
+declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
+declare <1 x double> @llvm.rint.v1f64(<1 x double>)
+declare <1 x double> @llvm.floor.v1f64(<1 x double>)
+declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
+declare <1 x double> @llvm.round.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
+
+define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrsqrte_f64
+; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrecpe_f64
+; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vsqrt_f64
+; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrecps_f64
+; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrsqrts_f64
+; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
\ No newline at end of file
Index: test/CodeGen/AArch64/neon-simd-shift.ll
===================================================================
--- test/CodeGen/AArch64/neon-simd-shift.ll
+++ test/CodeGen/AArch64/neon-simd-shift.ll
@@ -1522,3 +1522,35 @@
declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+ %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+ ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+ %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+ ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+ %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+ ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+ %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+ ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
\ No newline at end of file
Index: tools/clang/include/clang/Basic/arm_neon.td
===================================================================
--- tools/clang/include/clang/Basic/arm_neon.td
+++ tools/clang/include/clang/Basic/arm_neon.td
@@ -653,9 +653,9 @@
////////////////////////////////////////////////////////////////////////////////
// Reciprocal/Sqrt
-// With additional Qd type.
-def FRECPS : IInst<"vrecps", "ddd", "fQfQd">;
-def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">;
+// With additional d, Qd type.
+def FRECPS : IInst<"vrecps", "ddd", "fdQfQd">;
+def FRSQRTS : IInst<"vrsqrts", "ddd", "fdQfQd">;
////////////////////////////////////////////////////////////////////////////////
// bitwise reverse
@@ -679,39 +679,39 @@
def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI>;
def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "fj", "d">;
def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI>;
-def VCVT_F64_F32 : SInst<"vcvt_f64", "wd", "f">;
-def VCVT_F64 : SInst<"vcvt_f64", "Fd", "QlQUl">;
+def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">;
+def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">;
def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI>;
def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">;
def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
-def FRINTN : SInst<"vrndn", "dd", "fQfQd">;
-def FRINTA : SInst<"vrnda", "dd", "fQfQd">;
-def FRINTP : SInst<"vrndp", "dd", "fQfQd">;
-def FRINTM : SInst<"vrndm", "dd", "fQfQd">;
-def FRINTX : SInst<"vrndx", "dd", "fQfQd">;
-def FRINTZ : SInst<"vrnd", "dd", "fQfQd">;
-def FRINTI : SInst<"vrndi", "dd", "fQfQd">;
-def VCVT_S64 : SInst<"vcvt_s64", "xd", "Qd">;
-def VCVT_U64 : SInst<"vcvt_u64", "ud", "Qd">;
+def FRINTN : SInst<"vrndn", "dd", "fdQfQd">;
+def FRINTA : SInst<"vrnda", "dd", "fdQfQd">;
+def FRINTP : SInst<"vrndp", "dd", "fdQfQd">;
+def FRINTM : SInst<"vrndm", "dd", "fdQfQd">;
+def FRINTX : SInst<"vrndx", "dd", "fdQfQd">;
+def FRINTZ : SInst<"vrnd", "dd", "fdQfQd">;
+def FRINTI : SInst<"vrndi", "dd", "fdQfQd">;
+def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">;
+def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">;
def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">;
-def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "Qd">;
+def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">;
def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">;
-def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "Qd">;
+def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">;
def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">;
-def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "Qd">;
+def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">;
def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">;
-def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "Qd">;
+def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">;
def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">;
-def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "Qd">;
+def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">;
def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">;
-def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "Qd">;
+def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">;
def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">;
-def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "Qd">;
+def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">;
def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">;
-def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "Qd">;
-def FRECPE : SInst<"vrecpe", "dd", "fUiQfQUiQd">;
-def FRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUiQd">;
-def FSQRT : SInst<"vsqrt", "dd", "fQfQd">;
+def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">;
+def FRECPE : SInst<"vrecpe", "dd", "fdUiQfQUiQd">;
+def FRSQRTE : SInst<"vrsqrte", "dd", "fdUiQfQUiQd">;
+def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">;
////////////////////////////////////////////////////////////////////////////////
// Comparison
@@ -726,7 +726,7 @@
def CMTST : WInst<"vtst", "udd",
"csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">;
def CFMEQ : SOpInst<"vceq", "udd",
- "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
+ "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>;
def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>;
@@ -800,9 +800,9 @@
def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
let isVCVT_N = 1 in {
-def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "QlQUl">;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "Qd">;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">;
+def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">;
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">;
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">;
}
////////////////////////////////////////////////////////////////////////////////
Index: tools/clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2938,6 +2938,7 @@
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
bool usgn = Type.isUnsigned();
+ bool quad = Type.isQuad();
llvm::VectorType *VTy = GetNeonType(this, Type);
llvm::Type *Ty = VTy;
@@ -3198,9 +3199,10 @@
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
+ case AArch64::BI__builtin_neon_vcvt_n_f64_v:
case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
llvm::Type *Tys[2] = { FloatTy, Ty };
Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
: Intrinsic::arm_neon_vcvtfxs2fp;
@@ -3215,10 +3217,12 @@
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
+ case AArch64::BI__builtin_neon_vcvt_n_s64_v:
+ case AArch64::BI__builtin_neon_vcvt_n_u64_v:
case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
llvm::Type *Tys[2] = { Ty, FloatTy };
Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
: Intrinsic::arm_neon_vcvtfp2fxs;
@@ -3463,7 +3467,7 @@
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad ? true : false));
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -3657,15 +3661,16 @@
Int = Intrinsic::aarch64_neon_fcvtxn;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
}
- case AArch64::BI__builtin_neon_vcvt_f64_v: {
+ case AArch64::BI__builtin_neon_vcvt_f64_f32: {
llvm::Type *OpTy =
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
+ case AArch64::BI__builtin_neon_vcvt_f64_v:
case AArch64::BI__builtin_neon_vcvtq_f64_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
}
@@ -3709,10 +3714,12 @@
case AArch64::BI__builtin_neon_vcvtq_s32_v:
case AArch64::BI__builtin_neon_vcvtq_u32_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E);
+ case AArch64::BI__builtin_neon_vcvt_s64_v:
+ case AArch64::BI__builtin_neon_vcvt_u64_v:
case AArch64::BI__builtin_neon_vcvtq_s64_v:
case AArch64::BI__builtin_neon_vcvtq_u64_v: {
llvm::Type *DoubleTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
@@ -3724,6 +3731,7 @@
Int = Intrinsic::aarch64_neon_fcvtns;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32");
}
+ case AArch64::BI__builtin_neon_vcvtn_s64_v:
case AArch64::BI__builtin_neon_vcvtnq_s64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3737,6 +3745,7 @@
Int = Intrinsic::aarch64_neon_fcvtnu;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32");
}
+ case AArch64::BI__builtin_neon_vcvtn_u64_v:
case AArch64::BI__builtin_neon_vcvtnq_u64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3750,6 +3759,7 @@
Int = Intrinsic::aarch64_neon_fcvtps;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32");
}
+ case AArch64::BI__builtin_neon_vcvtp_s64_v:
case AArch64::BI__builtin_neon_vcvtpq_s64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3763,6 +3773,7 @@
Int = Intrinsic::aarch64_neon_fcvtpu;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32");
}
+ case AArch64::BI__builtin_neon_vcvtp_u64_v:
case AArch64::BI__builtin_neon_vcvtpq_u64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3776,6 +3787,7 @@
Int = Intrinsic::aarch64_neon_fcvtms;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32");
}
+ case AArch64::BI__builtin_neon_vcvtm_s64_v:
case AArch64::BI__builtin_neon_vcvtmq_s64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3789,6 +3801,7 @@
Int = Intrinsic::aarch64_neon_fcvtmu;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32");
}
+ case AArch64::BI__builtin_neon_vcvtm_u64_v:
case AArch64::BI__builtin_neon_vcvtmq_u64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3802,6 +3815,7 @@
Int = Intrinsic::aarch64_neon_fcvtas;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32");
}
+ case AArch64::BI__builtin_neon_vcvta_s64_v:
case AArch64::BI__builtin_neon_vcvtaq_s64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3815,6 +3829,7 @@
Int = Intrinsic::aarch64_neon_fcvtau;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32");
}
+ case AArch64::BI__builtin_neon_vcvta_u64_v:
case AArch64::BI__builtin_neon_vcvtaq_u64_v: {
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3829,7 +3844,7 @@
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E);
case AArch64::BI__builtin_neon_vsqrt_v:
case AArch64::BI__builtin_neon_vsqrtq_v: {
- Int = Intrinsic::aarch64_neon_fsqrt;
+ Int = Intrinsic::sqrt;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
}
case AArch64::BI__builtin_neon_vcvt_f32_v:
Index: tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -11218,3 +11218,171 @@
// CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return vabdd_f64(a, b);
}
+
+int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvt_s64_f64
+ return vcvt_s64_f64(a);
+ // CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvt_u64_f64
+ return vcvt_u64_f64(a);
+ // CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtn_s64_f64
+ return vcvtn_s64_f64(a);
+ // CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtn_u64_f64
+ return vcvtn_u64_f64(a);
+ // CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtp_s64_f64
+ return vcvtp_s64_f64(a);
+ // CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtp_u64_f64
+ return vcvtp_u64_f64(a);
+ // CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtm_s64_f64
+ return vcvtm_s64_f64(a);
+ // CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvtm_u64_f64
+ return vcvtm_u64_f64(a);
+ // CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvta_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvta_s64_f64
+ return vcvta_s64_f64(a);
+ // CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvta_u64_f64
+ return vcvta_u64_f64(a);
+ // CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vcvt_f64_s64(int64x1_t a) {
+ // CHECK-LABEL: test_vcvt_f64_s64
+ return vcvt_f64_s64(a);
+ // CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
+ // CHECK-LABEL: test_vcvt_f64_u64
+ return vcvt_f64_u64(a);
+ // CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvt_n_s64_f64
+ return vcvt_n_s64_f64(a, 64);
+ // CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vcvt_n_u64_f64
+ return vcvt_n_u64_f64(a, 64);
+ // CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
+ // CHECK-LABEL: test_vcvt_n_f64_s64
+ return vcvt_n_f64_s64(a, 64);
+ // CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
+ // CHECK-LABEL: test_vcvt_n_f64_u64
+ return vcvt_n_f64_u64(a, 64);
+ // CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vrndn_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrndn_f64
+ return vrndn_f64(a);
+ // CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrnda_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrnda_f64
+ return vrnda_f64(a);
+ // CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndp_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrndp_f64
+ return vrndp_f64(a);
+ // CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndm_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrndm_f64
+ return vrndm_f64(a);
+ // CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndx_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrndx_f64
+ return vrndx_f64(a);
+ // CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrnd_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrnd_f64
+ return vrnd_f64(a);
+ // CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndi_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrndi_f64
+ return vrndi_f64(a);
+ // CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrsqrte_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrsqrte_f64
+ return vrsqrte_f64(a);
+ // CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrecpe_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vrecpe_f64
+ return vrecpe_f64(a);
+ // CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vsqrt_f64(float64x1_t a) {
+ // CHECK-LABEL: test_vsqrt_f64
+ return vsqrt_f64(a);
+ // CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vrecps_f64
+ return vrecps_f64(a, b);
+ // CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
+ // CHECK-LABEL: test_vrsqrts_f64
+ return vrsqrts_f64(a, b);
+ // CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
Index: tools/clang/utils/TableGen/NeonEmitter.cpp
===================================================================
--- tools/clang/utils/TableGen/NeonEmitter.cpp
+++ tools/clang/utils/TableGen/NeonEmitter.cpp
@@ -914,7 +914,8 @@
/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
static std::string MangleName(const std::string &name, StringRef typestr,
ClassKind ck) {
- if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
+ if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
+ name == "vcvt_f64_f32")
return name;
bool quad = false;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits