GCC Maintainers: The support for the vec_mulo and vec_mule has yet another bug. For the case of signed/unsigned integer arguments the builtin generates the half word instruction not the word instruction. This patch fixes the issue. The fix has been tested and verified on powerpc64le-unknown-linux-gnu (Power 8 LE)
Is the patch OK for gcc mainline? Carl Love --------------------------------------------- >From 3127a3f9c8480fde428c4a13bc37d6eaefd0edfe Mon Sep 17 00:00:00 2001 From: Carl Love <ca...@us.ibm.com> Date: Fri, 16 Jun 2017 16:10:56 -0500 Subject: [PATCH] vec_mule, vec_mulo fix 2 gcc/ChangeLog: 2017-06-17 Carl Love <c...@us.ibm.com> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add ALTIVEC_BUILTIN_VMULESW, ALTIVEC_BUILTIN_VMULEUW, ALTIVEC_BUILTIN_VMULOSW, ALTIVEC_BUILTIN_VMULOUW enties. * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin(), builtin_function_type()): Add needed ALTIVEC_BUILTIN_* case statements. * config/rs6000/altivec.md (define_c_enum "unspec", define_expand "vec_widen_umult_even_v4si", define_expand "vec_widen_smult_even_v4si", define_expand "vec_widen_umult_odd_v4si", define_expand "vec_widen_smult_odd_v4si", define_insn "altivec_vmuleuw", define_insn "altivec_vmulesw", define_insn "altivec_vmulouw", define_insn "altivec_vmulosw"): Add support to generate vmuleuw, vmulesw, vmulouw, vmulosw instructions. * config/rs6000/rs6000-builtin.def (VMLEUW, VMULESW, VMULOUW, VMULOSW): Add definitions. --- gcc/config/rs6000/altivec.md | 91 ++++++++++++++++++++++++++++++++++++ gcc/config/rs6000/rs6000-builtin.def | 8 ++++ gcc/config/rs6000/rs6000-c.c | 12 +++-- gcc/config/rs6000/rs6000.c | 6 +++ 4 files changed, 113 insertions(+), 4 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 487b9a4..142300a 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -36,10 +36,14 @@ UNSPEC_VMULESB UNSPEC_VMULEUH UNSPEC_VMULESH + UNSPEC_VMULEUW + UNSPEC_VMULESW UNSPEC_VMULOUB UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH + UNSPEC_VMULOUW + UNSPEC_VMULOSW UNSPEC_VPKPX UNSPEC_VPACK_SIGN_SIGN_SAT UNSPEC_VPACK_SIGN_UNS_SAT @@ -1412,6 +1416,32 @@ DONE; }) +(define_expand "vec_widen_umult_even_v4si" + [(use (match_operand:V2DI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v4si" + [(use (match_operand:V2DI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosw (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_umult_odd_v16qi" [(use (match_operand:V8HI 0 "register_operand" "")) (use (match_operand:V16QI 1 "register_operand" "")) @@ -1464,6 +1494,32 @@ DONE; }) +(define_expand "vec_widen_umult_odd_v4si" + [(use (match_operand:V2DI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v4si" + [(use (match_operand:V2DI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesw (operands[0], operands[1], operands[2])); + DONE; +}) + (define_insn "altivec_vmuleub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1536,6 +1592,41 @@ "vmulosh %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuleuw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULEUW))] + "TARGET_ALTIVEC" + "vmuleuw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulouw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOUW))] + "TARGET_ALTIVEC" + "vmulouw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulesw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULESW))] + "TARGET_ALTIVEC" + "vmulesw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulosw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOSW))] + "TARGET_ALTIVEC" + "vmulosw %0,%1,%2" + [(set_attr "type" "veccomplex")]) ;; Vector pack/unpack (define_insn "altivec_vpkpx" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 241c439..780d452 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1031,10 +1031,14 @@ BU_ALTIVEC_2 (VMULEUB, "vmuleub", CONST, vec_widen_umult_even_v16qi) BU_ALTIVEC_2 (VMULESB, "vmulesb", CONST, vec_widen_smult_even_v16qi) BU_ALTIVEC_2 (VMULEUH, "vmuleuh", CONST, vec_widen_umult_even_v8hi) BU_ALTIVEC_2 (VMULESH, "vmulesh", CONST, vec_widen_smult_even_v8hi) +BU_ALTIVEC_2 (VMULEUW, "vmuleuw", CONST, vec_widen_umult_even_v4si) +BU_ALTIVEC_2 (VMULESW, "vmulesw", CONST, vec_widen_smult_even_v4si) BU_ALTIVEC_2 (VMULOUB, "vmuloub", CONST, vec_widen_umult_odd_v16qi) BU_ALTIVEC_2 (VMULOSB, "vmulosb", CONST, vec_widen_smult_odd_v16qi) BU_ALTIVEC_2 (VMULOUH, "vmulouh", CONST, vec_widen_umult_odd_v8hi) BU_ALTIVEC_2 (VMULOSH, "vmulosh", CONST, vec_widen_smult_odd_v8hi) +BU_ALTIVEC_2 (VMULOUW, "vmulouw", CONST, vec_widen_umult_odd_v4si) +BU_ALTIVEC_2 (VMULOSW, "vmulosw", CONST, vec_widen_smult_odd_v4si) BU_ALTIVEC_2 (VNOR, "vnor", CONST, norv4si3) BU_ALTIVEC_2 (VOR, "vor", CONST, iorv4si3) BU_ALTIVEC_2 (VPKUHUM, "vpkuhum", CONST, altivec_vpkuhum) @@ -1346,12 +1350,16 @@ BU_ALTIVEC_OVERLOAD_2 (VMRGLH, "vmrglh") BU_ALTIVEC_OVERLOAD_2 (VMRGLW, "vmrglw") BU_ALTIVEC_OVERLOAD_2 (VMULESB, "vmulesb") BU_ALTIVEC_OVERLOAD_2 (VMULESH, "vmulesh") +BU_ALTIVEC_OVERLOAD_2 (VMULESW, "vmulesw") BU_ALTIVEC_OVERLOAD_2 (VMULEUB, "vmuleub") BU_ALTIVEC_OVERLOAD_2 (VMULEUH, "vmuleuh") +BU_ALTIVEC_OVERLOAD_2 (VMULEUW, "vmuleuw") BU_ALTIVEC_OVERLOAD_2 (VMULOSB, "vmulosb") BU_ALTIVEC_OVERLOAD_2 (VMULOSH, "vmulosh") +BU_ALTIVEC_OVERLOAD_2 (VMULOSW, "vmulosw") BU_ALTIVEC_OVERLOAD_2 (VMULOUB, "vmuloub") BU_ALTIVEC_OVERLOAD_2 (VMULOUH, "vmulouh") +BU_ALTIVEC_OVERLOAD_2 (VMULOUW, "vmulouw") BU_ALTIVEC_OVERLOAD_2 (VPKSHSS, "vpkshss") BU_ALTIVEC_OVERLOAD_2 (VPKSHUS, "vpkshus") BU_ALTIVEC_OVERLOAD_2 (VPKSWSS, "vpkswss") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index f1e8d3d..af0bafd 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -2207,11 +2207,13 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, + + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESW, RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB, @@ -2226,11 +2228,13 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, + + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSW, RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6b28658..d1a4937 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16332,9 +16332,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) /* Even element flavors of vec_mul (signed). */ case ALTIVEC_BUILTIN_VMULESB: case ALTIVEC_BUILTIN_VMULESH: + case ALTIVEC_BUILTIN_VMULESW: /* Even element flavors of vec_mul (unsigned). */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULEUW: { arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -16347,9 +16349,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) /* Odd element flavors of vec_mul (signed). */ case ALTIVEC_BUILTIN_VMULOSB: case ALTIVEC_BUILTIN_VMULOSH: + case ALTIVEC_BUILTIN_VMULOSW: /* Odd element flavors of vec_mul (unsigned). */ case ALTIVEC_BUILTIN_VMULOUB: case ALTIVEC_BUILTIN_VMULOUH: + case ALTIVEC_BUILTIN_VMULOUW: { arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -17965,8 +17969,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULEUW: case ALTIVEC_BUILTIN_VMULOUB: case ALTIVEC_BUILTIN_VMULOUH: + case ALTIVEC_BUILTIN_VMULOUW: case CRYPTO_BUILTIN_VCIPHER: case CRYPTO_BUILTIN_VCIPHERLAST: case CRYPTO_BUILTIN_VNCIPHER: -- 1.9.1