https://gcc.gnu.org/g:c73c188527d53e4ee026bf49c5a48263adb868df
commit c73c188527d53e4ee026bf49c5a48263adb868df Author: Michael Meissner <[email protected]> Date: Mon Oct 27 19:14:14 2025 -0400 Add initial _Float16 and __bfloat16 support. 2025-10-27 Michael Meissner <[email protected]> gcc/ * config/rs6000/altivec.md (VM): Add support for the V8HFmode and V8BFmode vector types used for 16-bit floating point. (VM2): Likewise. (VI_char): Likewise. (VI_scalar): Likewise. (VI_char): Likewise. (VP_small): Likewise. (VP_small_lc): Likewise. (VU_char): Likewise. * config/rs6000/float16.md: New file for 16-bit floating point support. * config/rs6000/predicates.md (fp16_xxspltiw_constant): New predicate. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add support for _Float16 and __bfloat16 types. (rs6000_init_builtins): Define the __bfloat16 type if -mbfloat16. * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define __FLOAT16__ if -mfloat16. Define __BFLOAT16__ if -mbfloat16. * config/rs6000/rs6000-call.cc (init_cumulative_args): Warn if 16-bit floating point types are passed or returned unless -Wno-psabi is used. (rs6000_function_arg): Likewise. * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Add a comment for setting -mfloat16 in the future for -mcpu=power8. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000-modes.def (BFmode): Define modes relating to 16-bit floating point. (HFmode): Likewise. (V8BFmode): Likewise. (V8HFmode): Likewise. (V4HFmode): Likewise. (V4BFmode): Likewise. * config/rs6000/rs6000-p8swap.cc (rs6000_gen_stvx): Remove #ifdef for V8HFmode, since we now support it. Add support for V8BFmode. (rs6000_gen_lvx): Likewise. (replace_swapped_load_constant): Likewise. * config/rs6000/rs6000-protos.h (vec_const_128bit_type): Add mode field for recognizing 16-bit floating point constants. * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add support for the _Float16 and __bfloat16 scalr/vector types. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_option_override_internal): Add consistancy checks for -mfloat16 (easy_altivec_constant): Do not allow non-zero 16-bit floating point constants. and -mbfloat16. (xxspltib_constant_p): Add support for V8HFmode and V8BFmode all 0 vector mode constants. (output_vec_const_move): Add 16-bit floating point scalar/vector support. (rs6000_expand_vector_init): Likewise. (reg_offset_addressing_ok_p): Likewise. (rs6000_legitimate_offset_address_p): Likewise. (legitimate_lo_sum_address_p): Likewise. (rs6000_const_vec): Likewise. (rs6000_emit_move): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_load_constant_and_splat): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (rs6000_floatn_mode): Likewise. (rs6000_opt_masks): Add -mfloat16 and -mbfloat16. (constant_fp_to_128bit_vector): Add 16-bit floating point scalar/vector support. * config/rs6000/rs6000.h (FP16_SCALAR_MODE_P): New macro. (FP16_VECTOR_MODE_P): Likewise. (ALTIVEC_VECTOR_MODE): Add V8HFmode and V8BFmode. * config/rs6000/rs6000.md (FMOVE128_GPR): Add V8HFmode, V8BFmode, HFmode, and BFmode modes. (wd): Likewise. (du_or_d): Likewise. (BOOL_128): Likewise. (BOOL_REGS_OUTPUT): Likewise. (BOOL_REGS_OP1): Likewise. (BOOL_REGS_OP2): Likewise. (BOOL_REGS_UNARY): Likewise. (RELOAD): Likewise. (toplevel): Include float16.md. * config/rs6000/rs6000.opt (-mfloat16): New switch. (-mbfloat16): Likewise. * config/rs6000/vector.md (VEC_L): Add V8HFmode and V8BFmode. (VEC_M): Likewise. (VEC_E): Likewise. (VEC_base): Likewise. (VEC_base_l): Likewise. * config/rs6000/vsx.md (VECTOR_16BIT): New mode iterator. (VSX_L): Add V8HFmode and V8BFmode. (VSX_M): Likewise. (VSX_XXBR): Likewise. (VSm): Likewise. (VSr): Likewise. (VSisa): Likewise. (??r): Likewise. (nW): Likewise. (VSv): Likewise. (VM3): Likewise. (VM3_char): Likewise. (vsx_le_perm_load_v8hi): Convert the V8HImode permute insns to also handle V8HFmode and V8BFmode modes. (vsx_le_perm_load_<mode>): Likewise. (vsx_le_perm_store_v8hi): Likewise. (vsx_le_perm_store_<mode>): Likewise. (V8??mode splitter): Likewise. (vsx_ld_elemrev_v8hi): Likewise. (vsx_ld_elemrev_<mode>): Likewise. (vsx_ld_elemrev_v8hi): Likewise. (vsx_ld_elemrev_v8hi_internal): Likewise. (vsx_ld_elemrev_<mode>_interna): Likewise. (vsx_st_elemrev_v8hi): Likewise. (vsx_st_elemrev_<mode): Likewise. (vsx_st_elemrev_v8hi): Likewise. (vsx_st_elemrev_<mode>_interna): Likewise. (xxswapd_v8hi): Likewise. (xxswapd_<mode>): Likewise. (vsx_lxvd2x8_le_V8HI): Likewise. (vsx_lxvd2x8_le_<MODE): Likewise. (vsx_stxvd2x8_le_V8HI): Likewise. (vsx_stxvd2x8_le_<MODE>): Likewise. (vsx_extract_<mode>_store_p9): Likewise. (vsx_extract_<mode>_p8): Likewise. Diff: --- gcc/config/rs6000/altivec.md | 34 +++++- gcc/config/rs6000/float16.md | 124 +++++++++++++++++++ gcc/config/rs6000/predicates.md | 26 ++++ gcc/config/rs6000/rs6000-builtin.cc | 16 +++ gcc/config/rs6000/rs6000-c.cc | 5 + gcc/config/rs6000/rs6000-call.cc | 20 +++ gcc/config/rs6000/rs6000-cpus.def | 17 +++ gcc/config/rs6000/rs6000-modes.def | 8 ++ gcc/config/rs6000/rs6000-p8swap.cc | 14 +-- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.cc | 235 ++++++++++++++++++++++++++++++++---- gcc/config/rs6000/rs6000.h | 12 ++ gcc/config/rs6000/rs6000.md | 21 +++- gcc/config/rs6000/rs6000.opt | 8 ++ gcc/config/rs6000/vector.md | 37 +++++- gcc/config/rs6000/vsx.md | 151 +++++++++++++---------- 16 files changed, 623 insertions(+), 106 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index fa3368079ada..d821960cb5f5 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -191,6 +191,8 @@ ;; otherwise handled by altivec (v2df, v2di, ti) (define_mode_iterator VM [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -203,6 +205,8 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -222,18 +226,38 @@ V1TI TI]) -(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_char [(V2DI "d") + (V4SI "w") + (V8HI "h") + (V8BF "h") + (V8HF "h") + (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") + (V4SI "SI") + (V8HI "HI") + (V8BF "BF") + (V8HF "HF") + (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V8BF "VECTOR_UNIT_ALTIVEC_P (V8BFmode)") + (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)") (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) ;; Vector pack/unpack (define_mode_iterator VP [V2DI V4SI V8HI]) -(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) -(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) -(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) +(define_mode_attr VP_small [(V2DI "V4SI") + (V4SI "V8HI") + (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") + (V4SI "v8hi") + (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") + (V4SI "h") + (V8HI "b") + (V8BF "b") + (V8HF "b")]) ;; Vector negate (define_mode_iterator VNEG [V4SI V2DI]) diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md new file mode 100644 index 000000000000..d186d8e7d601 --- /dev/null +++ b/gcc/config/rs6000/float16.md @@ -0,0 +1,124 @@ +;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler +;; Copyright (C) 1990-2025 Free Software Foundation, Inc. +;; Contributed by Richard Kenner ([email protected]) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Support for _Float16 (HFmode) and __bfloat16 (BFmode) + +;; Mode iterator for 16-bit floating point modes both as a scalar and +;; as a vector. +(define_mode_iterator FP16 [(BF "TARGET_BFLOAT16") + (HF "TARGET_FLOAT16")]) + +;; Mode attribute giving the vector mode for a 16-bit floating point +;; scalar in both upper and lower case. +(define_mode_attr FP16_VECTOR8 [(BF "V8BF") + (HF "V8HF")]) + +(define_mode_attr fp16_vector8 [(BF "v8bf") + (HF "v8hf")]) + +;; _Float16 and __bfloat16 moves +(define_expand "mov<mode>" + [(set (match_operand:FP16 0 "nonimmediate_operand") + (match_operand:FP16 1 "any_operand"))] + "" +{ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +;; On power10, we can load up HFmode and BFmode constants with xxspltiw +;; or pli. +(define_insn "*mov<mode>_xxspltiw" + [(set (match_operand:FP16 0 "gpc_reg_operand" "=wa,wa,?r,?r") + (match_operand:FP16 1 "fp16_xxspltiw_constant" "j,eP,j,eP"))] + "TARGET_PREFIXED || operands[1] == CONST0_RTX (<MODE>mode)" +{ + rtx op1 = operands[1]; + const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); + long real_words[1]; + + if (op1 == CONST0_RTX (<MODE>mode)) + return (!vsx_register_operand (operands[0], <MODE>mode) + ? "li %0,0" + : "xxlxor %x0,%x0,%x0"); + + real_to_target (real_words, rtype, <MODE>mode); + operands[2] = GEN_INT (real_words[0]); + return (vsx_register_operand (operands[0], <MODE>mode) + ? "xxspltiw %x0,%2" + : "pli %0,%2"); +} + [(set_attr "type" "veclogical, vecsimple, *, *") + (set_attr "prefixed" "no, yes, no, yes")]) + +(define_insn "*mov<mode>_internal" + [(set (match_operand:FP16 0 "nonimmediate_operand" + "=wa, wa, Z, r, r, + m, r, wa, wa, r") + + (match_operand:FP16 1 "any_operand" + "wa, Z, wa, r, m, + r, wa, r, j, j"))] + "gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)" + "@ + xxlor %x0,%x1,%x1 + lxsihzx %x0,%y1 + stxsihx %x1,%y0 + mr %0,%1 + lhz%U1%X1 %0,%1 + sth%U0%X0 %1,%0 + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 + xxlxor %x0,%x0,%x0 + li %0,0" + [(set_attr "type" "vecsimple, fpload, fpstore, *, load, + store, mtvsr, mfvsr, veclogical, *") + (set_attr "isa" "*, p9v, p9v, *, *, + *, p8v, p8v, p9v, *")]) + +;; Vector duplicate +(define_insn "*vecdup<mode>_reg" + [(set (match_operand:<FP16_VECTOR8> 0 "altivec_register_operand" "=v") + (vec_duplicate:<FP16_VECTOR8> + (match_operand:FP16 1 "altivec_register_operand" "v")))] + "" + "vsplth %0,%1,3" + [(set_attr "type" "vecperm")]) + +(define_insn "*vecdup<mode>_const" + [(set (match_operand:<FP16_VECTOR8> 0 "vsx_register_operand" "=wa,wa") + (vec_duplicate:<FP16_VECTOR8> + (match_operand:FP16 1 "fp16_xxspltiw_constant" "j,eP")))] + "TARGET_PREFIXED || operands[1] == CONST0_RTX (<MODE>mode)" +{ + rtx op1 = operands[1]; + if (op1 == CONST0_RTX (<MODE>mode)) + return "xxlxor %x0,%x0,%x0"; + + const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); + long real_words[1]; + + real_to_target (real_words, rtype, <MODE>mode); + operands[2] = GEN_INT (real_words[0]); + return "xxspltiw %x0,2"; +} + [(set_attr "type" "veclogical,vecperm") + (set_attr "prefixed" "*,yes")]) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..e9ddc61e3a8a 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -601,6 +601,11 @@ if (TARGET_VSX && op == CONST0_RTX (mode)) return 1; + /* Power9 needs to load HFmode constants from memory, Power10 can use + XXSPLTIW. */ + if (mode == HFmode && !TARGET_POWER10) + return 0; + /* Constants that can be generated with ISA 3.1 instructions are easy. */ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) @@ -2166,3 +2171,24 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +;; Return 1 if this is a 16-bit floating point constant that can be +;; loaded with XXSPLTIW or is 0.0 that can be loaded with XXSPLTIB. +(define_predicate "fp16_xxspltiw_constant" + (match_code "const_double") +{ + if (!FP16_SCALAR_MODE_P (mode)) + return false; + + if (op == CONST0_RTX (mode)) + return true; + + if (!TARGET_PREFIXED) + return false; + + vec_const_128bit_type vsx_const; + if (!vec_const_128bit_to_bytes (op, mode, &vsx_const)) + return false; + + return constant_generates_xxspltiw (&vsx_const); +}) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index dfbb7d02157b..94a4441e8f9c 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -491,6 +491,10 @@ const char *rs6000_type_string (tree type_node) return "voidc*"; else if (type_node == float128_type_node) return "_Float128"; + else if (type_node == float16_type_node) + return "_Float16"; + else if (TARGET_BFLOAT16 && type_node == bfloat16_type_node) + return "__bfloat16"; else if (type_node == vector_pair_type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) @@ -756,6 +760,18 @@ rs6000_init_builtins (void) else ieee128_float_type_node = NULL_TREE; + /* __bfloat16 support. */ + if (TARGET_BFLOAT16) + { + bfloat16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (bfloat16_type_node) = 16; + SET_TYPE_MODE (bfloat16_type_node, BFmode); + layout_type (bfloat16_type_node); + t = build_qualified_type (bfloat16_type_node, TYPE_QUAL_CONST); + lang_hooks.types.register_builtin_type (bfloat16_type_node, + "__bfloat16"); + } + /* Vector pair and vector quad support. */ vector_pair_type_node = make_node (OPAQUE_TYPE); SET_TYPE_MODE (vector_pair_type_node, OOmode); diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 70e6d4b1e6db..7ab226885182 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -586,6 +586,11 @@ rs6000_target_modify_macros (bool define_p, if ((flags & OPTION_MASK_FLOAT128_HW) != 0) rs6000_define_or_undefine_macro (define_p, "__FLOAT128_HARDWARE__"); + /* 16-bit floating point support. */ + if ((flags & OPTION_MASK_FLOAT16) != 0) + rs6000_define_or_undefine_macro (define_p, "__FLOAT16__"); + if ((flags & OPTION_MASK_BFLOAT16) != 0) + rs6000_define_or_undefine_macro (define_p, "__BFLOAT16__"); /* Tell the user if we are targeting CELL. */ if (rs6000_cpu == PROCESSOR_CELL) rs6000_define_or_undefine_macro (define_p, "__PPU__"); diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 8fe5652442e3..41c0d4f71590 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -684,6 +684,18 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, " altivec instructions are disabled, use %qs" " to enable them", "-maltivec"); } + + /* Warn that __bfloat16 and _Float16 might be returned differently in the + future. The issue is currently 16-bit floating point is returned in + floating point register #1 in 16-bit format. We may or may not want to + return it as a scalar 64-bit value. */ + if (fntype && warn_psabi && !cum->libcall) + { + machine_mode ret_mode = TYPE_MODE (TREE_TYPE (fntype)); + if (ret_mode == BFmode || ret_mode == HFmode) + warning (OPT_Wpsabi, "%s might be returned differently in the future", + ret_mode == BFmode ? "__bfloat16" : "_Float16"); + } } @@ -1641,6 +1653,14 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) return NULL_RTX; } + /* Warn that _Float16 and __bfloat16 might be passed differently in the + future. The issue is currently 16-bit floating point values are passed in + floating point registers in the native 16-bit format. We may or may not + want to pass the value it as a scalar 64-bit value. */ + if (warn_psabi && !cum->libcall && (mode == BFmode || mode == HFmode)) + warning (OPT_Wpsabi, "%s might be passed differently in the future", + mode == BFmode ? "__bfloat16" : "_Float16"); + /* Return a marker to indicate whether CR1 needs to set or clear the bit that V.4 uses to say fp args were passed in registers. Assume that we don't need the marker for software floating point, diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 233f01e9c615..f3b0a8de8348 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -88,6 +88,16 @@ /* For now, don't provide an embedded version of ISA 2.07. Do not set power8 fusion here, instead set it in rs6000.cc if we are tuning for a power8 system. */ + +/* In the future consider adding OPTION_MASK_BFLOAT16 and OPTION_MASK_FLOAT16 + when the 16-bit floating support is less experimental. ISA 2.7 (power8) is + the minimum ISA that can support 16-bit point floating point because we need + direct move to load 16-bit values into floating point/vector registers from + GPRs. ISA 3.0 (power9) adds the ability to load/store 16-bit values + directly to floating point/vector registers, and hardware instructions to + convert between _Float16 and float. ISA 3.1 (power10) adds instructions to + convert between __bfloat16 and float vector types. */ + #define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_CRYPTO \ @@ -152,7 +162,13 @@ #endif /* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ + +/* In the future if 16-bit floating point is enabled by default for ISA 2.7 + (power8), add OPTION_MASK_FLOAT16 and OPTION_MASK_BFLOAT16 to + POWERPC_MASKS. */ + #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ + /* | OPTION_MASK_BFLOAT16 */ \ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ @@ -160,6 +176,7 @@ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_FLOAT128_HW \ | OPTION_MASK_FLOAT128_KEYWORD \ + /* | OPTION_MASK_FLOAT16 */ \ | OPTION_MASK_FPRND \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_HTM \ diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index f89e4ef403c1..f8b11b2c8576 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -45,6 +45,12 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* IBM 128-bit floating point. */ FLOAT_MODE (IF, 16, ibm_extended_format); +/* Explicit IEEE 16-bit floating point. */ +FLOAT_MODE (HF, 2, ieee_half_format); + +/* Explicit bfloat16 floating point. */ +FLOAT_MODE (BF, 2, arm_bfloat_half_format); + /* Add any extra modes needed to represent the condition code. For the RS/6000, we need separate modes when unsigned (logical) comparisons @@ -70,6 +76,8 @@ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ /* Half VMX/VSX vector (for internal use) */ VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ VECTOR_MODE (INT, SI, 2); /* V2SI */ +VECTOR_MODE (FLOAT, BF, 4); /* V4BF */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ /* Replacement for TImode that only is allowed in GPRs. We also use PTImode for quad memory atomic operations to force getting an even/odd register diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc index e92f01031270..7ba50f11a7bf 100644 --- a/gcc/config/rs6000/rs6000-p8swap.cc +++ b/gcc/config/rs6000/rs6000-p8swap.cc @@ -1598,10 +1598,10 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); else if (mode == V8HImode) stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); -#ifdef HAVE_V8HFmode else if (mode == V8HFmode) stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); -#endif + else if (mode == V8BFmode) + stvx = gen_altivec_stvx_v8bf (src_exp, dest_exp); else if (mode == V4SImode) stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); else if (mode == V4SFmode) @@ -1722,10 +1722,10 @@ rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); else if (mode == V8HImode) lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); -#ifdef HAVE_V8HFmode else if (mode == V8HFmode) lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); -#endif + else if (mode == V8BFmode) + lvx = gen_altivec_lvx_v8bf (dest_exp, src_exp); else if (mode == V4SImode) lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); else if (mode == V4SFmode) @@ -1930,11 +1930,7 @@ replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); new_mem = force_const_mem (mode, new_const_vector); } - else if ((mode == V8HImode) -#ifdef HAVE_V8HFmode - || (mode == V8HFmode) -#endif - ) + else if (mode == V8HImode || mode == V8HFmode || mode == V8BFmode) { rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); int i; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197b..9bf971370d41 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -250,6 +250,7 @@ typedef struct { bool all_words_same; /* Are the words all equal? */ bool all_half_words_same; /* Are the half words all equal? */ bool all_bytes_same; /* Are the bytes all equal? */ + machine_mode mode; /* Original constant mode. */ } vec_const_128bit_type; extern bool vec_const_128bit_to_bytes (rtx, machine_mode, diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 44bcf9664121..f2c39aacc9a3 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1896,7 +1896,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { - if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) + if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p + && !FP16_SCALAR_MODE_P (mode)) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1986,7 +1987,8 @@ static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) + || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode + || FP16_SCALAR_MODE_P (mode1) || FP16_SCALAR_MODE_P (mode2)) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2252,6 +2254,8 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + BFmode, + HFmode, SFmode, DFmode, TFmode, @@ -2272,6 +2276,8 @@ rs6000_debug_reg_global (void) V8SImode, V4DImode, V2TImode, + V8BFmode, + V8HFmode, V4SFmode, V2DFmode, V8SFmode, @@ -2630,8 +2636,14 @@ rs6000_setup_reg_addr_masks (void) /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and - STFIWZX instructions to load it. */ - bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + STFIWZX instructions to load it. + + Never allow offset addressing for 16-bit floating point modes, since + it is expected that 16-bit floating point should always go into the + vector registers and we only have indexed and indirect 16-bit loads to + VSR registers. */ + bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) + || FP16_SCALAR_MODE_P (m)); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2680,6 +2692,7 @@ rs6000_setup_reg_addr_masks (void) && !complex_p && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) + && !FP16_SCALAR_MODE_P (m) && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2896,18 +2909,24 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; rs6000_vector_align[V4SImode] = align32; rs6000_vector_align[V8HImode] = align32; + rs6000_vector_align[V8HFmode] = align32; + rs6000_vector_align[V8BFmode] = align32; rs6000_vector_align[V16QImode] = align32; if (TARGET_VSX) { rs6000_vector_mem[V4SImode] = VECTOR_VSX; rs6000_vector_mem[V8HImode] = VECTOR_VSX; + rs6000_vector_mem[V8HFmode] = VECTOR_VSX; + rs6000_vector_mem[V8BFmode] = VECTOR_VSX; rs6000_vector_mem[V16QImode] = VECTOR_VSX; } else { rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V8HFmode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V8BFmode] = VECTOR_ALTIVEC; rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; } } @@ -2927,6 +2946,20 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[V1TImode] = 128; } + /* _Float16 support. */ + if (TARGET_FLOAT16) + { + rs6000_vector_mem[HFmode] = VECTOR_VSX; + rs6000_vector_align[HFmode] = 16; + } + + /* _bfloat16 support. */ + if (TARGET_BFLOAT16) + { + rs6000_vector_mem[BFmode] = VECTOR_VSX; + rs6000_vector_align[BFmode] = 16; + } + /* DFmode, see if we want to use the VSX unit. Memory is handled differently, so don't set rs6000_vector_mem. */ if (TARGET_VSX) @@ -3008,6 +3041,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_di_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_di_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_di_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_di_load; reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; @@ -3037,6 +3074,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; } + if (TARGET_FLOAT16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_di_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; + } + + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_di_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3059,6 +3108,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8BFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8bf; + reg_addr[V8HFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hf; reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; @@ -3069,6 +3120,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8BFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8bf; + reg_addr[V8HFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hf; reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; @@ -3106,6 +3159,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_si_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_si_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_si_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; @@ -3129,6 +3186,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; } + if (TARGET_FLOAT16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_si_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; + } + + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_si_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3868,6 +3937,23 @@ rs6000_option_override_internal (bool global_init_p) } } + /* -mfloat16 and -mbfloat16 needs power8 at a minimum in order to load up + 16-bit values into vector registers via loads/stores from GPRs and then + using direct moves. */ + if (TARGET_FLOAT16 && !TARGET_POWER8) + { + rs6000_isa_flags &= ~OPTION_MASK_FLOAT16; + if (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT16) + error ("%qs requires at least %qs", "-mfloat16", "-mcpu=power8"); + } + + if (TARGET_BFLOAT16 && !TARGET_POWER8) + { + rs6000_isa_flags &= ~OPTION_MASK_BFLOAT16; + if (rs6000_isa_flags_explicit & OPTION_MASK_BFLOAT16) + error ("%qs requires at least %qs", "-mbfloat16", "-mcpu=power8"); + } + /* If hard-float/altivec/vsx were explicitly turned off then don't allow the -mcpu setting to enable options that conflict. */ if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) @@ -6431,9 +6517,12 @@ easy_altivec_constant (rtx op, machine_mode mode) else if (mode != GET_MODE (op)) return 0; - /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy - constants. */ - if (mode == V2DFmode) + /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy constants. + Likewise, don't handle 16-bit floating point constants here, unless they + are 0.0. */ + if (mode == V2DFmode + || FP16_SCALAR_MODE_P (mode) + || FP16_VECTOR_MODE_P (mode)) return zero_constant (op, mode) ? 8 : 0; else if (mode == V2DImode) @@ -6559,11 +6648,16 @@ xxspltib_constant_p (rtx op, /* Handle (vec_duplicate <constant>). */ if (GET_CODE (op) == VEC_DUPLICATE) { + element = XEXP (op, 0); + + /* For V8BFmode & V8HFmode, the only valid to use xxspltib is 0.0. */ + if (mode == V8BFmode || mode == V8HFmode) + return element == CONST0_RTX (GET_MODE_INNER (mode)); + if (mode != V16QImode && mode != V8HImode && mode != V4SImode && mode != V2DImode) return false; - element = XEXP (op, 0); if (!CONST_INT_P (element)) return false; @@ -6575,6 +6669,20 @@ xxspltib_constant_p (rtx op, /* Handle (const_vector [...]). */ else if (GET_CODE (op) == CONST_VECTOR) { + /* For V8BFmode & V8HFmode, the only valid to use xxspltib is 0.0. */ + if (mode == V8BFmode || mode == V8HFmode) + { + if (op == CONST0_RTX (mode)) + return true; + + rtx zero = CONST0_RTX (GET_MODE_INNER (mode)); + for (i = 0; i < nunits; i++) + if (CONST_VECTOR_ELT (op, i) != zero) + return false; + + return true; + } + if (mode != V16QImode && mode != V8HImode && mode != V4SImode && mode != V2DImode) return false; @@ -6791,6 +6899,8 @@ output_vec_const_move (rtx *operands) return "vspltisw %0,%1"; case E_V8HImode: + case E_V8HFmode: + case E_V8BFmode: return "vspltish %0,%1"; case E_V16QImode: @@ -7019,6 +7129,15 @@ rs6000_expand_vector_init (rtx target, rtx vals) return; } + /* Special case splats of 16-bit floating point. */ + if (all_same && FP16_VECTOR_MODE_P (mode)) + { + rtx op0 = force_reg (GET_MODE_INNER (mode), XVECEXP (vals, 0, 0)); + rtx dup = gen_rtx_VEC_DUPLICATE (mode, op0); + emit_insn (gen_rtx_SET (target, dup)); + return; + } + /* Special case initializing vector short/char that are splats if we are on 64-bit systems with direct move. */ if (all_same && TARGET_DIRECT_MOVE_64BIT @@ -7082,7 +7201,9 @@ rs6000_expand_vector_init (rtx target, rtx vals) return; } - if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode)) + if (TARGET_DIRECT_MOVE + && (mode == V16QImode || mode == V8HImode || mode == V8HFmode + || mode == V8BFmode)) { rtx op[16]; /* Force the values into word_mode registers. */ @@ -8657,6 +8778,8 @@ reg_offset_addressing_ok_p (machine_mode mode) { case E_V16QImode: case E_V8HImode: + case E_V8HFmode: + case E_V8BFmode: case E_V4SFmode: case E_V4SImode: case E_V2DFmode: @@ -8675,6 +8798,13 @@ reg_offset_addressing_ok_p (machine_mode mode) return mode_supports_dq_form (mode); break; + /* For 16-bit floating point types, do not allow offset addressing, since + it is assumed that most of the use will be in vector registers, and we + only have reg+reg addressing for 16-bit modes. */ + case E_BFmode: + case E_HFmode: + return false; + /* The vector pair/quad types support offset addressing if the underlying vectors support offset addressing. */ case E_OOmode: @@ -8965,6 +9095,13 @@ rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, extra = 0; switch (mode) { + /* For 16-bit floating point types, do not allow offset addressing, since + it is assumed that most of the use will be in vector registers, and we + only have reg+reg addressing for 16-bit modes. */ + case E_BFmode: + case E_HFmode: + return false; + case E_DFmode: case E_DDmode: case E_DImode: @@ -9066,6 +9203,11 @@ macho_lo_sum_memory_operand (rtx x, machine_mode mode) static bool legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) { + /* For 16-bit floating point types, do not allow offset addressing, since + it is assumed that most of the use will be in vector registers, and we + only have reg+reg addressing for 16-bit modes. */ + if (FP16_SCALAR_MODE_P (mode)) + return false; if (GET_CODE (x) != LO_SUM) return false; if (!REG_P (XEXP (x, 0))) @@ -10762,6 +10904,8 @@ rs6000_const_vec (machine_mode mode) subparts = 4; break; case E_V8HImode: + case E_V8HFmode: + case E_V8BFmode: subparts = 8; break; case E_V16QImode: @@ -11217,6 +11361,8 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) case E_V16QImode: case E_V8HImode: + case E_V8HFmode: + case E_V8BFmode: case E_V4SFmode: case E_V4SImode: case E_V2DFmode: @@ -12658,6 +12804,9 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { + if (FP16_SCALAR_MODE_P (mode)) + return true; + if (TARGET_POWERPC64) { /* ISA 2.07: MTVSRD or MVFVSRD. */ @@ -13445,6 +13594,11 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; + /* IEEE 16-bit and bfloat16 don't support offset addressing, but they can + go in any floating point/vector register. */ + if (FP16_SCALAR_MODE_P (mode)) + return rclass; + /* If this is a scalar floating point value and we don't have D-form addressing, prefer the traditional floating point registers so that we can use D-form (register+offset) addressing. */ @@ -13674,6 +13828,9 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); + if (FP16_SCALAR_MODE_P (from) || FP16_SCALAR_MODE_P (to)) + return from_size == to_size; + if (from_size != to_size) { enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; @@ -22960,7 +23117,7 @@ rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) { rtx reg; - if (mode == SFmode || mode == DFmode) + if (mode == SFmode || mode == DFmode || FP16_SCALAR_MODE_P (mode)) { rtx d = const_double_from_real_value (dconst, mode); reg = force_reg (mode, d); @@ -24287,6 +24444,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; + else if (FP16_SCALAR_MODE_P (mode)) + return true; else return default_scalar_mode_supported_p (mode); } @@ -24338,6 +24497,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_FLOAT16 ? SFmode : opt_scalar_float_mode (); + case 32: return DFmode; @@ -24359,6 +24521,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_FLOAT16 ? HFmode : opt_scalar_float_mode (); + case 32: return SFmode; @@ -24463,6 +24628,7 @@ struct rs6000_opt_mask { static struct rs6000_opt_mask const rs6000_opt_masks[] = { { "altivec", OPTION_MASK_ALTIVEC, false, true }, + { "bfloat16", OPTION_MASK_BFLOAT16, false, true }, { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX, false, true }, { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR, @@ -24478,6 +24644,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, + { "float16", OPTION_MASK_FLOAT16, false, true }, { "isel", OPTION_MASK_ISEL, false, true }, { "mfcrf", OPTION_MASK_MFCRF, false, true }, { "mfpgpr", 0, false, true }, @@ -28956,24 +29123,37 @@ constant_fp_to_128bit_vector (rtx op, const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op); long real_words[VECTOR_128BIT_WORDS]; - /* Make sure we don't overflow the real_words array and that it is - filled completely. */ - gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); - - real_to_target (real_words, rtype, mode); + /* For 16-bit floating point, the constant doesn't fill the whole 32-bit + word. Deal with it here, storing the bytes in big endian fashion. */ + if (FP16_SCALAR_MODE_P (mode)) + { + real_to_target (real_words, rtype, mode); + info->bytes[byte_num] = (unsigned char) (real_words[0] >> 8); + info->bytes[byte_num+1] = (unsigned char) (real_words[0]); + } - /* Iterate over each 32-bit word in the floating point constant. The - real_to_target function puts out words in target endian fashion. We need - to arrange the order so that the bytes are written in big endian order. */ - for (unsigned num = 0; num < num_words; num++) + else { - unsigned endian_num = (BYTES_BIG_ENDIAN - ? num - : num_words - 1 - num); + /* Make sure we don't overflow the real_words array and that it is filled + completely. */ + gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + + real_to_target (real_words, rtype, mode); + + /* Iterate over each 32-bit word in the floating point constant. The + real_to_target function puts out words in target endian fashion. We + need to arrange the order so that the bytes are written in big endian + order. */ + for (unsigned num = 0; num < num_words; num++) + { + unsigned endian_num = (BYTES_BIG_ENDIAN + ? num + : num_words - 1 - num); - unsigned uvalue = real_words[endian_num]; - for (int shift = 32 - 8; shift >= 0; shift -= 8) - info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + unsigned uvalue = real_words[endian_num]; + for (int shift = 32 - 8; shift >= 0; shift -= 8) + info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + } } /* Mark that this constant involves floating point. */ @@ -29012,6 +29192,7 @@ vec_const_128bit_to_bytes (rtx op, return false; /* Set up the bits. */ + info->mode = mode; switch (GET_CODE (op)) { /* Integer constants, default to double word. */ @@ -29239,6 +29420,10 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; + /* HFmode/BFmode constants can always use XXSPLTIW. */ + if (FP16_SCALAR_MODE_P (vsx_const->mode)) + return 1; + if (!vsx_const->all_words_same) return 0; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 643aa2449318..0249219bbbc9 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -343,6 +343,16 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || ((MODE) == TDmode) \ || (!TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE))) +/* Is this a valid 16-bit scalar floating point mode? */ +#define FP16_SCALAR_MODE_P(MODE) \ + (((MODE) == HFmode && TARGET_FLOAT16) \ + || ((MODE) == BFmode && TARGET_BFLOAT16)) + +/* Is this a valid 16-bit vector floating point mode? */ +#define FP16_VECTOR_MODE_P(MODE) \ + (((MODE) == V8HFmode && TARGET_FLOAT16) \ + || ((MODE) == V8BFmode && TARGET_BFLOAT16)) + /* Return true for floating point that does not use a vector register. */ #define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE) \ (SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE)) @@ -991,6 +1001,8 @@ enum data_align { align_abi, align_opt, align_both }; #define ALTIVEC_VECTOR_MODE(MODE) \ ((MODE) == V16QImode \ || (MODE) == V8HImode \ + || (MODE) == V8HFmode \ + || (MODE) == V8BFmode \ || (MODE) == V4SFmode \ || (MODE) == V4SImode \ || VECTOR_ALIGNMENT_P (MODE)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index edb624fcc9e7..60df4d5dae6e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -552,6 +552,8 @@ (define_mode_iterator FMOVE128_GPR [TI V16QI V8HI + V8BF + V8HF V4SI V4SF V2DI @@ -716,6 +718,8 @@ (DI "d") (V16QI "b") (V8HI "h") + (V8BF "h") + (V8HF "h") (V4SI "w") (V2DI "d") (V1TI "q") @@ -728,6 +732,8 @@ (DI "d") (V16QI "du") (V8HI "du") + (V8BF "du") + (V8HF "du") (V4SI "du") (V2DI "d")]) @@ -777,6 +783,8 @@ PTI (V16QI "TARGET_ALTIVEC") (V8HI "TARGET_ALTIVEC") + (V8BF "TARGET_BFLOAT16") + (V8HF "TARGET_FLOAT16") (V4SI "TARGET_ALTIVEC") (V4SF "TARGET_ALTIVEC") (V2DI "TARGET_ALTIVEC") @@ -794,6 +802,8 @@ (PTI "&r,r,r") (V16QI "wa,v,&?r,?r,?r") (V8HI "wa,v,&?r,?r,?r") + (V8BF "wa,v,&?r,?r,?r") + (V8HF "wa,v,&?r,?r,?r") (V4SI "wa,v,&?r,?r,?r") (V4SF "wa,v,&?r,?r,?r") (V2DI "wa,v,&?r,?r,?r") @@ -805,6 +815,8 @@ (PTI "r,0,r") (V16QI "wa,v,r,0,r") (V8HI "wa,v,r,0,r") + (V8BF "wa,v,r,0,r") + (V8HF "wa,v,r,0,r") (V4SI "wa,v,r,0,r") (V4SF "wa,v,r,0,r") (V2DI "wa,v,r,0,r") @@ -816,6 +828,8 @@ (PTI "r,r,0") (V16QI "wa,v,r,r,0") (V8HI "wa,v,r,r,0") + (V8BF "wa,v,r,r,0") + (V8HF "wa,v,r,r,0") (V4SI "wa,v,r,r,0") (V4SF "wa,v,r,r,0") (V2DI "wa,v,r,r,0") @@ -829,6 +843,8 @@ (PTI "r,0,0") (V16QI "wa,v,r,0,0") (V8HI "wa,v,r,0,0") + (V8BF "wa,v,r,0,0") + (V8HF "wa,v,r,0,0") (V4SI "wa,v,r,0,0") (V4SF "wa,v,r,0,0") (V2DI "wa,v,r,0,0") @@ -837,8 +853,8 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. -(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI KF IF TF +(define_mode_iterator RELOAD [V16QI V8HI V8BF V8HF V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI KF IF TF HF BF OO XO]) ;; Iterate over smin, smax @@ -15875,3 +15891,4 @@ (include "htm.md") (include "fusion.md") (include "pcrel-opt.md") +(include "float16.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 31852e02aa0f..7acc539f42a7 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -638,6 +638,14 @@ mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. +mfloat16 +Target Mask(FLOAT16) Var(rs6000_isa_flags) +Enable or disable _Float16 support. + +mbfloat16 +Target Mask(BFLOAT16) Var(rs6000_isa_flags) +Enable or disable __bfloat16 support. + ; Documented parameters -param=rs6000-vect-unroll-limit= diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index f5797387ca79..0b9727cca35c 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -50,11 +50,31 @@ (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) ;; Vector logical modes -(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF]) +(define_mode_iterator VEC_L [V16QI + V8HI + V8BF + V8HF + V4SI + V2DI + V4SF + V2DF + V1TI + TI + KF + TF]) ;; Vector modes for moves. Don't do TImode or TFmode here, since their ;; moves are handled elsewhere. -(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF]) +(define_mode_iterator VEC_M [V16QI + V8HI + V4SI + V2DI + V8BF + V8HF + V4SF + V2DF + V1TI + KF]) ;; Vector modes for types that don't need a realignment under VSX (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) @@ -63,7 +83,14 @@ (define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) ;; Vector init/extract modes -(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VEC_E [V16QI + V8HI + V4SI + V2DI + V8BF + V8HF + V4SF + V2DF]) ;; Vector modes for 64-bit base types (define_mode_iterator VEC_64 [V2DI V2DF]) @@ -76,6 +103,8 @@ (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V8BF "BF") + (V8HF "HF") (V4SF "SF") (V2DF "DF") (V1TI "TI") @@ -86,6 +115,8 @@ (V8HI "hi") (V4SI "si") (V2DI "di") + (V8BF "bf") + (V8HF "hf") (V4SF "sf") (V2DF "df") (V1TI "ti") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b80868..3198802dabb9 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -46,9 +46,14 @@ ;; Iterator for vector floating point types supported by VSX (define_mode_iterator VSX_F [V4SF V2DF]) +;; Iterator for 8 element vectors +(define_mode_iterator VECTOR_16BIT [V8HI V8BF V8HF]) + ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI + V8BF + V8HF V4SI V2DI V4SF @@ -61,6 +66,8 @@ ;; Iterator for memory moves. (define_mode_iterator VSX_M [V16QI V8HI + V8BF + V8HF V4SI V2DI V4SF @@ -71,6 +78,8 @@ TI]) (define_mode_attr VSX_XXBR [(V8HI "h") + (V8BF "h") + (V8HF "h") (V4SI "w") (V4SF "w") (V2DF "d") @@ -80,6 +89,8 @@ ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") + (V8BF "vw4") + (V8HF "vw4") (V4SI "vw4") (V4SF "vw4") (V2DF "vd2") @@ -93,6 +104,8 @@ ;; Map the register class used (define_mode_attr VSr [(V16QI "v") (V8HI "v") + (V8BF "v") + (V8HF "v") (V4SI "v") (V4SF "wa") (V2DI "wa") @@ -108,6 +121,8 @@ ;; What value we need in the "isa" field, to make the IEEE QP float work. (define_mode_attr VSisa [(V16QI "*") (V8HI "*") + (V8BF "p10") + (V8HF "p9v") (V4SI "*") (V4SF "*") (V2DI "*") @@ -124,6 +139,8 @@ ;; integer modes. (define_mode_attr ??r [(V16QI "??r") (V8HI "??r") + (V8BF "??r") + (V8HF "??r") (V4SI "??r") (V4SF "??r") (V2DI "??r") @@ -136,6 +153,8 @@ ;; A mode attribute used for 128-bit constant values. (define_mode_attr nW [(V16QI "W") (V8HI "W") + (V8BF "W") + (V8HF "W") (V4SI "W") (V4SF "W") (V2DI "W") @@ -163,6 +182,8 @@ ;; operation (define_mode_attr VSv [(V16QI "v") (V8HI "v") + (V8BF "v") + (V8HF "v") (V4SI "v") (V4SF "v") (V2DI "v") @@ -396,6 +417,8 @@ ;; Like VM2 in altivec.md, just do char, short, int, long, float and double (define_mode_iterator VM3 [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -407,6 +430,8 @@ (define_mode_attr VM3_char [(V2DI "d") (V4SI "w") (V8HI "h") + (V8BF "h") + (V8HF "h") (V16QI "b") (V2DF "d") (V4SF "w")]) @@ -541,21 +566,21 @@ [(set_attr "type" "vecload") (set_attr "length" "8")]) -(define_insn_and_split "*vsx_le_perm_load_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (match_operand:VECTOR_16BIT 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" "&& 1" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -802,27 +827,27 @@ (const_int 0) (const_int 1)])))] "") -(define_insn "*vsx_le_perm_store_v8hi" - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") - (match_operand:V8HI 1 "vsx_register_operand" "wa"))] +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand" "=Z") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") (set_attr "length" "12")]) (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -861,25 +886,25 @@ ;; The post-reload split requires that we re-permute the source ;; register in case it is still live. (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" [(set (match_dup 1) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 1) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -1434,15 +1459,15 @@ "lxvw4x %x0,%y1" [(set_attr "type" "vecload")]) -(define_expand "vsx_ld_elemrev_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_expand "vsx_ld_elemrev_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { @@ -1452,9 +1477,9 @@ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; - subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); + subreg = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0); emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); - subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); + subreg2 = simplify_gen_subreg (<MODE>mode, tmp, V4SImode, 0); for (i = 0; i < 16; ++i) perm[i] = GEN_INT (reorder[i]); @@ -1462,21 +1487,21 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, - subreg2, pcv)); + emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], subreg2, + subreg2, pcv)); DONE; } }) -(define_insn "*vsx_ld_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_ld_elemrev_<mode>_internal" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvh8x %x0,%y1" [(set_attr "type" "vecload")]) @@ -1584,20 +1609,20 @@ "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_expand "vsx_st_elemrev_v8hi" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_expand "vsx_st_elemrev_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { rtx mem_subreg, subreg, perm[16], pcv; - rtx tmp = gen_reg_rtx (V8HImode); + rtx tmp = gen_reg_rtx (<MODE>mode); /* 2 is leftmost element in register */ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; @@ -1608,10 +1633,10 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], - operands[1], pcv)); - subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); - mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); + emit_insn (gen_altivec_vperm_<mode>_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, <MODE>mode, 0); + mem_subreg = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0); emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); DONE; } @@ -1626,15 +1651,15 @@ "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "*vsx_st_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_st_elemrev_<mode>_internal" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvh8x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -3299,10 +3324,10 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) -(define_insn "xxswapd_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "xxswapd_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) @@ -3402,15 +3427,15 @@ "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "*vsx_lxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_lxvd2x8_le_<MODE>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) @@ -3478,15 +3503,15 @@ [(set_attr "type" "vecstore") (set_attr "length" "8")]) -(define_insn "*vsx_stxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_stxvd2x8_le_<MODE>" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -4060,7 +4085,8 @@ if (which_alternative == 0 && ((<MODE>mode == V16QImode && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) - || (<MODE>mode == V8HImode + || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4)))) { enum machine_mode dest_mode = GET_MODE (operands[0]); @@ -4139,7 +4165,8 @@ else vec_tmp = src; } - else if (<MODE>mode == V8HImode) + else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) { if (value != 3) emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
