> LGTM but you'll need an OK from Richard, > Thanks for working on this!
Thanks Tamar for help and coaching, let's wait Richard for a while,😊! Pan -----Original Message----- From: Tamar Christina <tamar.christ...@arm.com> Sent: Wednesday, May 15, 2024 5:12 PM To: Li, Pan2 <pan2...@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; richard.guent...@gmail.com; Liu, Hongtao <hongtao....@intel.com> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int Hi Pan, Thanks! > -----Original Message----- > From: pan2...@intel.com <pan2...@intel.com> > Sent: Wednesday, May 15, 2024 3:14 AM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; Tamar Christina > <tamar.christ...@arm.com>; richard.guent...@gmail.com; > hongtao....@intel.com; Pan Li <pan2...@intel.com> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned > scalar > int > > From: Pan Li <pan2...@intel.com> > > This patch would like to add the middle-end presentation for the > saturation add. Aka set the result of add to the max when overflow. > It will take the pattern similar as below. > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > Take uint8_t as example, we will have: > > * SAT_ADD (1, 254) => 255. > * SAT_ADD (1, 255) => 255. > * SAT_ADD (2, 255) => 255. > * SAT_ADD (255, 255) => 255. > > Given below example for the unsigned scalar integer uint64_t: > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > { > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > } > > Before this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > long unsigned int _1; > _Bool _2; > long unsigned int _3; > long unsigned int _4; > uint64_t _7; > long unsigned int _10; > __complex__ long unsigned int _11; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > _1 = REALPART_EXPR <_11>; > _10 = IMAGPART_EXPR <_11>; > _2 = _10 != 0; > _3 = (long unsigned int) _2; > _4 = -_3; > _7 = _1 | _4; > return _7; > ;; succ: EXIT > > } > > After this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _7; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > return _7; > ;; succ: EXIT > } > > The below tests are passed for this patch: > 1. The riscv fully regression tests. > 3. The x86 bootstrap tests. > 4. The x86 fully regression tests. > > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > to the return true switch case(s). > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > * match.pd: Add unsigned SAT_ADD match(es). > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > us/ssadd. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > extern func decl generated in match.pd match. > (match_saturation_arith): New func impl to match the saturation arith. > (math_opts_dom_walker::after_dom_children): Try match saturation > arith when IOR expr. > LGTM but you'll need an OK from Richard, Thanks for working on this! Tamar > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/internal-fn.cc | 1 + > gcc/internal-fn.def | 2 ++ > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > 5 files changed, 88 insertions(+), 2 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 0a7053c2286..73045ca8c8c 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > case IFN_UBSAN_CHECK_MUL: > case IFN_ADD_OVERFLOW: > case IFN_MUL_OVERFLOW: > + case IFN_SAT_ADD: > case IFN_VEC_WIDEN_PLUS: > case IFN_VEC_WIDEN_PLUS_LO: > case IFN_VEC_WIDEN_PLUS_HI: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 848bb9dbff3..25badbb86e5 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > | ECF_NOTHROW, first, > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > first, > smulhrs, umulhrs, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > binary) > + > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 07e743ae464..0f9c34fa897 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > || POINTER_TYPE_P (itype)) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > +/* Unsigned Saturation Add */ > +(match (usadd_left_part_1 @0 @1) > + (plus:c @0 @1) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_left_part_2 @0 @1) > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (lt (plus:c @0 @1) @0))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (gt @0 (plus:c @0 @1)))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_2 @0 @1) > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > integer_zerop))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > + because the sub part of left_part_2 cannot work with right_part_1. > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > + > +/* Unsigned saturation add, case 1 (branchless): > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > + > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index ad14f9328b9..3f2cb46aff8 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > OPTAB_NX(add_optab, "add$Q$a3") > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > OPTAB_VX(addv_optab, "add$F$a3") > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index e8c804f09b7..62da1c5ee08 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > *cast_stmt, gimple *&use_stmt, > return 0; > } > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > + > +/* > + * Try to match saturation arith pattern(s). > + * 1. SAT_ADD (unsigned) > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > +static void > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > + gcall *call = NULL; > + > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > + > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > + OPTIMIZE_FOR_BOTH)) > + { > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, true); > + } > +} > + > /* Recognize for unsigned x > x = y - z; > if (x > y) > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > (basic_block bb) > break; > > case BIT_IOR_EXPR: > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > break; > -- > 2.34.1