Author: Esme-Yi Date: 2020-10-04T16:24:20Z New Revision: e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c
URL: https://github.com/llvm/llvm-project/commit/e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c DIFF: https://github.com/llvm/llvm-project/commit/e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c.diff LOG: [PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp). Summary: This patch implements the builtins for xvtdivdp, xvtdivsp, xvtsqrtdp, xvtsqrtsp. The instructions correspond to the following builtins: int vec_test_swdiv(vector double v1, vector double v2); int vec_test_swdivs(vector float v1, vector float v2); int vec_test_swsqrt(vector double v1); int vec_test_swsqrts(vector float v1); This patch depends on D88274, which fixes the bug in copying from CRRC to GPRC/G8RC. Reviewed By: steven.zhang, amyk Differential Revision: https://reviews.llvm.org/D88278 Added: Modified: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/Headers/altivec.h clang/test/CodeGen/builtins-ppc-vsx.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/vsx_builtins.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 29bce799c8f4..015411abc508 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -558,6 +558,11 @@ BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "") BUILTIN(__builtin_vsx_xvtlsbb, "iV16UcUi", "") +BUILTIN(__builtin_vsx_xvtdivdp, "iV2dV2d", "") +BUILTIN(__builtin_vsx_xvtdivsp, "iV4fV4f", "") +BUILTIN(__builtin_vsx_xvtsqrtdp, "iV2d", "") +BUILTIN(__builtin_vsx_xvtsqrtsp, "iV4f", "") + // P10 Vector Permute Extended built-in. BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 572b8863dd1a..1d7bc201d330 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -3504,6 +3504,20 @@ vec_div(vector signed __int128 __a, vector signed __int128 __b) { } #endif __POWER10_VECTOR__ +/* vec_xvtdiv */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a, + vector double __b) { + return __builtin_vsx_xvtdivdp(__a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a, + vector float __b) { + return __builtin_vsx_xvtdivsp(__a, __b); +} +#endif + /* vec_dss */ #define vec_dss __builtin_altivec_dss @@ -8057,6 +8071,18 @@ vec_vrsqrtefp(vector float __a) { return __builtin_altivec_vrsqrtefp(__a); } +/* vec_xvtsqrt */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) { + return __builtin_vsx_xvtsqrtdp(__a); +} + +static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) { + return __builtin_vsx_xvtsqrtsp(__a); +} +#endif + /* vec_sel */ #define __builtin_altivec_vsel_4si vec_sel diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index 2542b30590bf..d99b0c1e8f41 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -52,6 +52,7 @@ vector unsigned long long res_vull; vector signed __int128 res_vslll; double res_d; +int res_i; float res_af[4]; double res_ad[2]; signed char res_asc[16]; @@ -878,6 +879,23 @@ void test1() { // CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) + res_i = vec_test_swsqrt(vd); +// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}}) + + res_i = vec_test_swsqrts(vf); +// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}}) + + res_i = vec_test_swdiv(vd, vd); +// CHECK: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) + + res_i = vec_test_swdivs(vf, vf); +// CHECK: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) + + dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 7b11555296a4..7ab4ee301bb5 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1249,6 +1249,16 @@ def int_ppc_vsx_xxinsertw : def int_ppc_vsx_xvtlsbb : PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivdp : + PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivsp : + PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtdp : + PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtsp : + PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xxeval : PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index f4612b9dfd31..18ed2cca0f02 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2591,6 +2591,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; +// Vector test for software divide and sqrt. +def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)), + (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)), + (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)), + (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)), + (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>; + // Reciprocal estimate def : Pat<(int_ppc_vsx_xvresp v4f32:$A), (XVRESP $A)>; diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll index b386565500f6..2ab747384b69 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -54,3 +54,55 @@ define void @test4(<2 x double> %a, i8* %b) { } ; Function Attrs: nounwind readnone declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*) + +define i32 @test_vec_test_swdiv(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vec_test_swdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivdp cr0, v2, v3 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtdivdp(<2 x double>, <2 x double>) + +define i32 @test_vec_test_swdivs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vec_test_swdivs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivsp cr0, v2, v3 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %a, <4 x float> %b) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtdivsp(<4 x float>, <4 x float>) + +define i32 @test_vec_test_swsqrt(<2 x double> %a) { +; CHECK-LABEL: test_vec_test_swsqrt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %a) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>) + +define i32 @test_vec_test_swsqrts(<4 x float> %a) { +; CHECK-LABEL: test_vec_test_swsqrts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtsp cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %a) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits