https://gcc.gnu.org/g:44d6d049fd774f2e9325eae9e3dbebf30e4accb1
commit 44d6d049fd774f2e9325eae9e3dbebf30e4accb1 Author: Michael Meissner <[email protected]> Date: Thu Nov 13 11:00:43 2025 -0500 Revert changes Diff: --- gcc/config/rs6000/float16.cc | 150 ----------------- gcc/config/rs6000/float16.md | 331 -------------------------------------- gcc/config/rs6000/predicates.md | 76 --------- gcc/config/rs6000/rs6000-protos.h | 2 - gcc/config/rs6000/rs6000.opt | 8 - 5 files changed, 567 deletions(-) diff --git a/gcc/config/rs6000/float16.cc b/gcc/config/rs6000/float16.cc index 2c7b6278a16a..5274a0df962f 100644 --- a/gcc/config/rs6000/float16.cc +++ b/gcc/config/rs6000/float16.cc @@ -183,153 +183,3 @@ fp16_vectorization (enum rtx_code icode, return; } - -/* Expand a bfloat16 scalar floating point operation: - - ICODE: Operation to perform. - RESULT: Result of the operation. - OP1: Input operand1. - OP2: Input operand2. - OP3: Input operand3 or NULL_RTX. - SUBTYPE: Describe the operation. - - The operation is done as a V4SFmode vector operation. This is because - converting BFmode from a scalar BFmode to SFmode to do the operation and - back again takes quite a bit of time. GCC will only generate the native - operation if -Ofast is used. The float16.md code that calls this function - adds various combine operations to do the operation in V4SFmode instead of - SFmode. */ - -void -bfloat16_operation_as_v4sf (enum rtx_code icode, - rtx result, - rtx op1, - rtx op2, - rtx op3, - enum fp16_operation subtype) -{ - gcc_assert (can_create_pseudo_p ()); - - rtx result_v4sf = gen_reg_rtx (V4SFmode); - rtx ops_orig[3] = { op1, op2, op3 }; - rtx ops_v4sf[3]; - size_t n_opts; - - switch (subtype) - { - case FP16_BINARY: - n_opts = 2; - gcc_assert (op3 == NULL_RTX); - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - gcc_assert (icode == FMA); - n_opts = 3; - break; - - default: - gcc_unreachable (); - } - - for (size_t i = 0; i < n_opts; i++) - { - rtx op = ops_orig[i]; - rtx tmp = ops_v4sf[i] = gen_reg_rtx (V4SFmode); - - gcc_assert (op != NULL_RTX); - - /* Remove truncation/extend added. */ - if (GET_CODE (op) == FLOAT_EXTEND || GET_CODE (op) == FLOAT_TRUNCATE) - op = XEXP (op, 0); - - /* Convert operands to V4SFmode format. We use SPLAT for registers to - get the value into the upper 32-bits. We can use XXSPLTW to splat - words instead of VSPLTIH since the XVCVBF16SPN instruction ignores the - odd half-words, and XXSPLTW can operate on all VSX registers instead - of just the Altivec registers. Using SPLAT instead of a shift also - insure that other bits are not a signalling NaN. If we are using - XXSPLTIW or XXSPLTIB to load the constant the other bits are - duplicated. */ - - if (op == CONST0_RTX (SFmode) || op == CONST0_RTX (BFmode)) - emit_move_insn (tmp, CONST0_RTX (V4SFmode)); - - else if (GET_MODE (op) == BFmode) - { - emit_insn (gen_xxspltw_bf (tmp, force_reg (BFmode, op))); - emit_insn (gen_xvcvbf16spn_bf (tmp, tmp)); - } - - else if (GET_MODE (op) == SFmode) - { - if (GET_CODE (op) == CONST_DOUBLE) - { - rtvec v = rtvec_alloc (4); - - for (size_t i = 0; i < 4; i++) - RTVEC_ELT (v, i) = op; - - emit_insn (gen_rtx_SET (tmp, - gen_rtx_CONST_VECTOR (V4SFmode, v))); - } - - else - emit_insn (gen_vsx_splat_v4sf (tmp, - force_reg (SFmode, op))); - } - - else - gcc_unreachable (); - } - - /* Do the operation in V4SFmode. */ - switch (subtype) - { - case FP16_BINARY: - emit_insn (gen_rtx_SET (result_v4sf, - gen_rtx_fmt_ee (icode, V4SFmode, - ops_v4sf[0], - ops_v4sf[1]))); - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - { - rtx op1 = ops_v4sf[0]; - rtx op2 = ops_v4sf[1]; - rtx op3 = ops_v4sf[2]; - - if (subtype == FP16_FMS || subtype == FP16_NFMS) - op3 = gen_rtx_NEG (V4SFmode, op3); - - rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3); - - if (subtype == FP16_NFMA || subtype == FP16_NFMS) - op_fma = gen_rtx_NEG (V4SFmode, op_fma); - - emit_insn (gen_rtx_SET (result_v4sf, op_fma)); - } - break; - - default: - gcc_unreachable (); - } - - /* Convert V4SF result back to scalar mode. */ - if (GET_MODE (result) == BFmode) - emit_insn (gen_xvcvspbf16_bf (result, result_v4sf)); - - else if (GET_MODE (result) == SFmode) - { - rtx element = GEN_INT (WORDS_BIG_ENDIAN ? 2 : 3); - emit_insn (gen_vsx_extract_v4sf (result, result_v4sf, element)); - } - - else - gcc_unreachable (); -} diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md index f2942a27aafb..690b8c2d6610 100644 --- a/gcc/config/rs6000/float16.md +++ b/gcc/config/rs6000/float16.md @@ -963,334 +963,3 @@ "TARGET_BFLOAT16_HW" "xvcvbf16spn %x0,%x1" [(set_attr "type" "vecperm")]) - -;; Optimize __bfloat16 binary operations. Unlike _Float16 where we -;; have instructions to convert between HFmode and SFmode as scalar -;; values, with BFmode, we only have vector conversions. Thus to do: -;; -;; __bfloat16 a, b, c; -;; a = b + c; -;; -;; the GCC compiler would normally generate: -;; -;; lxsihzx 0,4,2 // load __bfloat16 value b -;; lxsihzx 12,5,2 // load __bfloat16 value c -;; xxsldwi 0,0,0,1 // shift b into bits 16..31 -;; xxsldwi 12,12,12,1 // shift c into bits 16..31 -;; xvcvbf16spn 0,0 // vector convert b into V4SFmode -;; xvcvbf16spn 12,12 // vector convert c into V4SFmode -;; xscvspdpn 0,0 // convert b into SFmode scalar -;; xscvspdpn 12,12 // convert c into SFmode scalar -;; fadds 0,0,12 // add b+c -;; xscvdpspn 0,0 // convert b+c into SFmode memory format -;; xvcvspbf16 0,0 // convert b+c into BFmode memory format -;; stxsihx 0,3,2 // store b+c -;; -;; Using the following combiner patterns, the code generated would now -;; be: -;; -;; lxsihzx 12,4,2 // load __bfloat16 value b -;; lxsihzx 0,5,2 // load __bfloat16 value c -;; xxspltw 12,12,1 // shift b into bits 16..31 -;; xxspltw 0,0,1 // shift c into bits 16..31 -;; xvcvbf16spn 12,12 // vector convert b into V4SFmode -;; xvcvbf16spn 0,0 // vector convert c into V4SFmode -;; xvaddsp 0,0,12 // vector b+c in V4SFmode -;; xvcvspbf16 0,0 // convert b+c into BFmode memory format -;; stxsihx 0,3,2 // store b+c -;; -;; We cannot just define insns like 'addbf3' to keep the operation as -;; BFmode because GCC will not generate these patterns unless the user -;; uses -Ofast. Without -Ofast, it will always convert BFmode into -;; SFmode. - -(define_insn_and_split "*bfloat16_binary_op_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (match_operator:SF 1 "fp16_binary_operator" - [(match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")]))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[2], SFmode) - || bfloat16_bf_operand (operands[3], SFmode))" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); - DONE; -}) - -(define_insn_and_split "*bfloat16_binary_op_internal2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (match_operator:SF 1 "fp16_binary_operator" - [(match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")])))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[2], SFmode) - || bfloat16_bf_operand (operands[3], SFmode))" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); - DONE; -}) - -(define_insn_and_split "*bfloat16_fma_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_fma_internal2" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_fms_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_fms_internal2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal2" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (float_truncate:BF - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal3" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (neg:BF - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal3" - [(set (match_operand:BF 0 "vsx_register_operand") - (neg:BF - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))))] - "TARGET_BFLOAT16_HW && TARGET_BFLOAT16_COMBINE && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - -;; If we do multiple __bfloat16 operations, between the first and -;; second operation, GCC will want to convert the first operation from -;; V4SFmode to SFmode and then reconvert it back to V4SFmode. On the -;; PowerPC, this is complicated because internally in the vector -;; register, SFmode values are stored as DFmode values. -;; -;; For example, if we have: -;; -;; __bfloat16 a, b, c, d; -;; a = b + c + d; -;; -;; We would generate: -;; -;; lxsihzx 0,4,2 // load b as BFmode -;; lxsihzx 11,5,2 // load c as BFmode -;; lxsihzx 12,6,2 // load d as BFmode -;; xxspltw 0,0,1 // shift b into bits 16..31 -;; xxspltw 11,11,1 // shift c into bits 16..31 -;; xxspltw 12,12,1 // shift d into bits 16..31 -;; xvcvbf16spn 0,0 // convert b into V4SFmode -;; xvcvbf16spn 11,11 // convert c into V4SFmode -;; xvcvbf16spn 12,12 // convert d into V4SFmode -;; xvaddsp 0,0,11 // calculate b+c as V4SFmode -;; xscvspdp 0,0 // convert b+c into DFmode memory format -;; xscvdpspn 0,0 // convert b+c into SFmode memory format -;; xxspltw 0,0,0 // convert b+c into V4SFmode -;; xvaddsp 12,12,0 // calculate b+c+d as V4SFmode -;; xvcvspbf16 12,12 // convert b+c+d into BFmode memory format -;; stxsihx 12,3,2 // store b+c+d -;; -;; With this peephole2, we can eliminate the xscvspdp and xscvdpspn -;; instructions. -;; -;; We keep the xxspltw between the two xvaddsp's in case the user -;; explicitly did a SFmode extract of element 0 and did a splat -;; operation. - -(define_peephole2 - [(set (match_operand:SF 0 "vsx_register_operand") - (unspec:SF - [(match_operand:V4SF 1 "vsx_register_operand")] - UNSPEC_VSX_CVSPDP)) - (set (match_operand:V4SF 2 "vsx_register_operand") - (unspec:V4SF [(match_dup 0)] UNSPEC_VSX_CVDPSPN))] - "TARGET_BFLOAT16_COMBINE && TARGET_BFLOAT16_PEEPHOLE - && (REGNO (operands[1]) == REGNO (operands[2]) - || peep2_reg_dead_p (1, operands[1]))" - [(set (match_dup 2) (match_dup 1))]) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 172991de3662..785d09b94234 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2276,79 +2276,3 @@ return constant_generates_xxspltiw (&vsx_const); }) - -;; Return 1 if this is a 16-bit floating point operand that can be used -;; in an add, subtract, or multiply operation that uses the vector -;; conversion function. -(define_predicate "fp16_reg_or_constant_operand" - (match_code "reg,subreg,const_double") -{ - if (REG_P (op) || SUBREG_P (op)) - return vsx_register_operand (op, mode); - - if (CONST_DOUBLE_P (op)) - return fp16_xxspltiw_constant (op, mode); - - return false; -}) - -;; Match binary operators where we convert a BFmode operand into a -;; SFmode operand so that we can optimize the BFmode operation to do -;; the operation in vector mode rather than convverting the BFmode to a -;; V8BFmode vector, converting that V8BFmode vector to V4SFmode, and -;; then converting the V4SFmode element to SFmode scalar. -(define_predicate "fp16_binary_operator" - (match_code "plus,minus,mult,smax,smin")) - -;; Match bfloat16/float operands that can be optimized to do the -;; operation in V4SFmode. -(define_predicate "bfloat16_v4sf_operand" - (match_code "reg,subreg,const_double,float_extend,float_truncate") -{ - if (mode != BFmode && mode != SFmode) - return false; - - if (REG_P (op) || SUBREG_P (op)) - return register_operand (op, mode); - - if (CONST_DOUBLE_P (op)) - return true; - - if (GET_CODE (op) == FLOAT_EXTEND) - { - rtx op_arg = XEXP (op, 0); - return (mode == SFmode - && GET_MODE (op_arg) == BFmode - && (REG_P (op_arg) || SUBREG_P (op_arg))); - } - - if (GET_CODE (op) == FLOAT_TRUNCATE) - { - rtx op_arg = XEXP (op, 0); - return (mode == BFmode - && GET_MODE (op_arg) == SFmode - && (REG_P (op_arg) || SUBREG_P (op_arg))); - } - - return false; -}) - -;; Match an operand that originally was an BFmode value to prevent -;; operations involing only SFmode values from being converted to -;; BFmode. -(define_predicate "bfloat16_bf_operand" - (match_code "reg,subreg,const_double,float_extend") -{ - if (mode == BFmode || GET_MODE (op) == BFmode) - return true; - - if (mode != SFmode) - return false; - - if (GET_MODE (op) == SFmode - && GET_CODE (op) == FLOAT_EXTEND - && GET_MODE (XEXP (op, 0)) == BFmode) - return true; - - return false; -}) diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3665a405cfd2..dd5fcd69e836 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -271,8 +271,6 @@ enum fp16_operation { extern void fp16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx, enum fp16_operation); -extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx, - enum fp16_operation); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 814df3e11b58..588e4739f6bd 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -647,14 +647,6 @@ mfloat16 Target Mask(FLOAT16) Var(rs6000_isa_flags) Enable or disable 16-bit floating point. -mbfloat16-combine -Target Undocumented Var(TARGET_BFLOAT16_COMBINE) Init(0) Save -Enable (disable) __bfloat16 combine operations - -mbfloat16-peephole -Target Undocumented Var(TARGET_BFLOAT16_PEEPHOLE) Init(0) Save -Enable (disable) __bfloat16 peephole combine operations - ; Documented parameters -param=rs6000-vect-unroll-limit=
