https://gcc.gnu.org/g:d764c68321a348a359533d09dfc7ccbde7d398c4
commit d764c68321a348a359533d09dfc7ccbde7d398c4 Author: Michael Meissner <[email protected]> Date: Fri Oct 24 19:54:41 2025 -0400 Add -mbfloat16-pack, -mbfloat16-vector, and -mfloat16-vector. 2025-10-24 Michael Meissner <[email protected]> gcc/ * config/rs6000/float16.cc (fp16_vectorization): Call gen_vec_pack_trunc_v4sf_v8bf instead of gen_vec_pack_trunc_v4sf. * config/rs6000/float16.md (VFP16_HW): Add support for -mbfloat16-pack, -mbfloat16-vector, and -mfloat16-vector. (vec_pack_trunc_v4sf_v8bf): Likewise. * config/rs6000/rs6000.h (TARGET_BFLOAT16_HW_VECTOR): Likewise. (TARGET_FLOAT16_HW_VECTOR): Likewise. * config/rs6000/rs6000.opt (mfloat16-vector): Likewise. (mbfloat16-pack): Likewise. (mbfloat16-vector): Likewise. Diff: --- gcc/config/rs6000/float16.cc | 2 +- gcc/config/rs6000/float16.md | 32 ++++++++++++++++++++++++-------- gcc/config/rs6000/rs6000.h | 8 ++++++++ gcc/config/rs6000/rs6000.opt | 12 ++++++++++++ 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/gcc/config/rs6000/float16.cc b/gcc/config/rs6000/float16.cc index 3fb61e9e4621..b2a389270136 100644 --- a/gcc/config/rs6000/float16.cc +++ b/gcc/config/rs6000/float16.cc @@ -327,7 +327,7 @@ fp16_vectorization (enum rtx_code icode, emit_insn (gen_vec_pack_trunc_v4sf_v8hf (result, result_hi, result_lo)); else if (result_mode == V8BFmode) - emit_insn (gen_vec_pack_trunc_v4sf (result, result_hi, result_lo)); + emit_insn (gen_vec_pack_trunc_v4sf_v8bf (result, result_hi, result_lo)); else gcc_unreachable (); diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md index d719c0dec8af..dff5bfd563b6 100644 --- a/gcc/config/rs6000/float16.md +++ b/gcc/config/rs6000/float16.md @@ -33,8 +33,8 @@ (define_mode_iterator FP16_HW [(BF "TARGET_BFLOAT16_HW") (HF "TARGET_FLOAT16_HW")]) -(define_mode_iterator VFP16_HW [(V8BF "TARGET_BFLOAT16_HW") - (V8HF "TARGET_FLOAT16_HW")]) +(define_mode_iterator VFP16_HW [(V8BF "TARGET_BFLOAT16_HW_VECTOR") + (V8HF "TARGET_FLOAT16_HW_VECTOR")]) ;; Mode iterator for floating point modes other than SF/DFmode that we ;; convert to/from _Float16 (HFmode) via DFmode. @@ -1107,10 +1107,6 @@ ;; Vector Pack support. -;; Unfortunately the machine independent code assumes there is only one -;; 16-bit floating point type. So we have to choose whether to support -;; packing _Float16 or __bfloat16. - (define_expand "vec_pack_trunc_v4sf_v8hf" [(match_operand:V8HF 0 "vfloat_operand") (match_operand:V4SF 1 "vfloat_operand") @@ -1126,7 +1122,7 @@ DONE; }) -(define_expand "vec_pack_trunc_v4sf" +(define_expand "vec_pack_trunc_v4sf_v8bf" [(match_operand:V8BF 0 "vfloat_operand") (match_operand:V4SF 1 "vfloat_operand") (match_operand:V4SF 2 "vfloat_operand")] @@ -1141,6 +1137,26 @@ DONE; }) +;; Unfortunately the machine independent code assumes there is only one +;; 16-bit floating point type. This means we have to choose whether to +;; support packing _Float16 or __bfloat16. It looks like __bfloat16 is +;; more popular, so we choose __bfloat16 to be the default. + +(define_expand "vec_pack_trunc_v4sf" + [(match_operand:V8BF 0 "vfloat_operand") + (match_operand:V4SF 1 "vfloat_operand") + (match_operand:V4SF 2 "vfloat_operand")] + "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_PACK" +{ + rtx r1 = gen_reg_rtx (V8BFmode); + rtx r2 = gen_reg_rtx (V8BFmode); + + emit_insn (gen_xvcvspbf16_v8bf (r1, operands[1])); + emit_insn (gen_xvcvspbf16_v8bf (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + ;; Used for vector conversion to _Float16 (define_insn "xvcvsphp_v8hf" [(set (match_operand:V8HF 0 "vsx_register_operand" "=wa") @@ -1150,7 +1166,7 @@ "xvcvsphp %x0,%x1" [(set_attr "type" "vecfloat")]) -;; Used for vector conversion to __bloat16 +;; Used for vector conversion to __bfloat16 (define_insn "xvcvspbf16_v8bf" [(set (match_operand:V8BF 0 "vsx_register_operand" "=wa") (unspec:V8BF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 7850affe6afc..a17aaa785f31 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -347,6 +347,14 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_BFLOAT16_HW (TARGET_BFLOAT16 && TARGET_POWER10) #define TARGET_FLOAT16_HW (TARGET_FLOAT16 && TARGET_POWER9) +/* Do we have conversion support in hardware for the 16-bit floating point and + also enable the 16-bit floating point vector optimizations? */ +#define TARGET_BFLOAT16_HW_VECTOR \ + (TARGET_BFLOAT16 && TARGET_POWER10 && TARGET_BFLOAT16_VECTOR) + +#define TARGET_FLOAT16_HW_VECTOR \ + (TARGET_FLOAT16 && TARGET_POWER9 && TARGET_FLOAT16_VECTOR) + /* Is this a valid 16-bit scalar floating point mode? */ #define FP16_SCALAR_MODE_P(MODE) \ (((MODE) == HFmode && TARGET_FLOAT16) \ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 4849fb35cc54..053183b004c6 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -642,6 +642,10 @@ mfloat16 Target Mask(FLOAT16) Var(rs6000_isa_flags) Enable or disable _Float16 support. +mfloat16-vector +Target Undocumented Var(TARGET_FLOAT16_VECTOR) Init(1) Save +Enable or disable _Float16 vectorization optimizations + mfloat16-gpr-args Target Undocumented Var(TARGET_FLOAT16_GPR_ARGS) Init(0) Save Pass and return _Float16 and __bfloat16 in GPR registers. @@ -654,6 +658,14 @@ mbfloat16-combine Target Undocumented Var(TARGET_BFLOAT16_COMBINE) Init(1) Save Enable or disable __bfloat16 combine optimizations +mbfloat16-pack +Target Undocumented Var(TARGET_BFLOAT16_PACK) Init(1) Save +Enable or disable __bfloat16 vector pack optimizations + +mbfloat16-vector +Target Undocumented Var(TARGET_BFLOAT16_VECTOR) Init(1) Save +Enable or disable __bfloat16 vectorization optimizations + ; Documented parameters -param=rs6000-vect-unroll-limit=
