Committed as r196362.
2013/12/3 Kevin Qin <[email protected]> > kevin.qin added you to the CC list for the revision "[AArch64 NEON] > Implement intrinsic vceqz_f64 -llvm". > > Hi, > Please review, thanks. > > http://llvm-reviews.chandlerc.com/D2314 > > Files: > lib/Target/AArch64/AArch64InstrNEON.td > test/CodeGen/AArch64/neon-scalar-fp-compare.ll > > Index: lib/Target/AArch64/AArch64InstrNEON.td > =================================================================== > --- lib/Target/AArch64/AArch64InstrNEON.td > +++ lib/Target/AArch64/AArch64InstrNEON.td > @@ -4323,7 +4323,7 @@ > [], > NoItinerary>; > def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode, > - (outs FPR64:$Rd), (ins FPR64:$Rn, > fpz64movi:$FPImm), > + (outs FPR64:$Rd), (ins FPR64:$Rn, > fpz32:$FPImm), > !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), > [], > NoItinerary>; > @@ -4339,11 +4339,11 @@ > Instruction INSTS, > Instruction INSTD> { > def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), > - (v1f32 (scalar_to_vector (f32 > fpimm:$FPImm))))), > - (INSTS FPR32:$Rn, fpimm:$FPImm)>; > + (v1f32 (scalar_to_vector (f32 > fpz32:$FPImm))))), > + (INSTS FPR32:$Rn, fpz32:$FPImm)>; > def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), > - (v1f64 (bitconvert (v8i8 Neon_AllZero))))), > - (INSTD FPR64:$Rn, 0)>; > + (v1f32 (scalar_to_vector (f32 > fpz32:$FPImm))))), > + (INSTD FPR64:$Rn, fpz32:$FPImm)>; > } > > multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode, > @@ -5046,6 +5046,8 @@ > defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; > defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq, > FCMEQZssi, FCMEQZddi>; > +def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), > SETEQ)), > + (FCMEQZddi FPR64:$Rn, fpz32:$FPImm)>; > > // Scalar Floating-point Compare Mask Greater Than Or Equal > defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; > Index: test/CodeGen/AArch64/neon-scalar-fp-compare.ll > =================================================================== > --- test/CodeGen/AArch64/neon-scalar-fp-compare.ll > +++ test/CodeGen/AArch64/neon-scalar-fp-compare.ll > @@ -24,6 +24,15 @@ > ret i64 %0 > } > > +define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 { > +; CHECK: test_vceqz_f64 > +; CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 > +entry: > + %0 = fcmp oeq <1 x double> %a, zeroinitializer > + %vceqz.i = zext <1 x i1> %0 to <1 x i64> > + ret <1 x i64> %vceqz.i > +} > + > define i32 @test_vceqzs_f32(float %a) { > ; CHECK: test_vceqzs_f32 > ; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0 > @@ -39,7 +48,7 @@ > ; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0 > entry: > %vceq.i = insertelement <1 x double> undef, double %a, i32 0 > - %vceq1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 > x double> %vceq.i, <1 x double> zeroinitializer) > + %vceq1.i = tail call <1 x i64> > @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double> %vceq.i, <1 x float> > zeroinitializer) #5 > %0 = extractelement <1 x i64> %vceq1.i, i32 0 > ret i64 %0 > } > @@ -81,7 +90,7 @@ > ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0 > entry: > %vcge.i = insertelement <1 x double> undef, double %a, i32 0 > - %vcge1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 > x double> %vcge.i, <1 x double> zeroinitializer) > + %vcge1.i = tail call <1 x i64> > @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double> %vcge.i, <1 x float> > zeroinitializer) #5 > %0 = extractelement <1 x i64> %vcge1.i, i32 0 > ret i64 %0 > } > @@ -123,7 +132,7 @@ > ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0 > entry: > %vcgt.i = insertelement <1 x double> undef, double %a, i32 0 > - %vcgt1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 > x double> %vcgt.i, <1 x double> zeroinitializer) > + %vcgt1.i = tail call <1 x i64> > @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double> %vcgt.i, <1 x float> > zeroinitializer) #5 > %0 = extractelement <1 x i64> %vcgt1.i, i32 0 > ret i64 %0 > } > @@ -165,7 +174,7 @@ > ; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0 > entry: > %vcle.i = insertelement <1 x double> undef, double %a, i32 0 > - %vcle1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f64(<1 > x double> %vcle.i, <1 x double> zeroinitializer) > + %vcle1.i = tail call <1 x i64> > @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double> %vcle.i, <1 x > float> zeroinitializer) #5 > %0 = extractelement <1 x i64> %vcle1.i, i32 0 > ret i64 %0 > } > @@ -207,7 +216,7 @@ > ; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0 > entry: > %vclt.i = insertelement <1 x double> undef, double %a, i32 0 > - %vclt1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f64(<1 > x double> %vclt.i, <1 x double> zeroinitializer) > + %vclt1.i = tail call <1 x i64> > @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double> %vclt.i, <1 x > float> zeroinitializer) #5 > %0 = extractelement <1 x i64> %vclt1.i, i32 0 > ret i64 %0 > } > @@ -301,15 +310,18 @@ > } > > declare <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > +declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double>, > <1 x float>) > declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double>, > <1 x double>) > declare <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > +declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double>, > <1 x float>) > declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double>, > <1 x double>) > declare <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > -declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f64(<1 x > double>, <1 x double>) > +declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x > double>, <1 x float>) > declare <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > +declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double>, > <1 x float>) > declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double>, > <1 x double>) > declare <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > -declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f64(<1 x > double>, <1 x double>) > +declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x > double>, <1 x float>) > declare <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x > double>, <1 x double>) > declare <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float>, > <1 x float>) > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > > -- Best Regards, Kevin Qin
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
