Re: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int

2024-05-14 Thread Richard Biener
On Mon, May 6, 2024 at 4:49 PM  wrote:
>
> From: Pan Li 
>
> This patch depends on below scalar enabling patch:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html
>
> For vectorize, we leverage the existing vect pattern recog to find
> the pattern similar to scalar and let the vectorizer to perform
> the rest part for standard name usadd3 in vector mode.
> The riscv vector backend have insn "Vector Single-Width Saturating
> Add and Subtract" which can be leveraged when expand the usadd3
> in vector mode.  For example:
>
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   unsigned i;
>
>   for (i = 0; i < n; i++)
> out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
> }
>
> Before this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
>   ivtmp_58 = _80 * 8;
>   vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
>   vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
>   vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
>   mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
>   vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
> vect__7.11_66);
>   .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
>   vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
>   vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
>   vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
>   ivtmp_79 = ivtmp_78 - _80;
>   ...
> }
>
> After this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
>   ivtmp_46 = _62 * 8;
>   vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
>   vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
>   vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
>   .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
>   ...
> }
>
> The below test suites are passed for this patch.
> * The riscv fully regression tests.
> * The aarch64 fully regression tests.
> * The x86 bootstrap tests.
> * The x86 fully regression tests.
>
> PR target/51492
> PR target/112600
>
> gcc/ChangeLog:
>
> * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func
> decl generated by match.pd match.
> (vect_recog_sat_add_pattern): New func impl to recog the pattern
> for unsigned SAT_ADD.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/tree-vect-patterns.cc | 51 +++
>  1 file changed, 51 insertions(+)
>
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 87c2acff386..8ffcaf71d5c 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo,
>return pattern_stmt;
>  }
>
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
> + *   _7 = _4 + _6;
> + *   _8 = _4 > _7;
> + *   _9 = (long unsigned int) _8;
> + *   _10 = -_9;
> + *   _12 = _7 | _10;
> + *
> + * And then simplied to
> + *   _12 = .SAT_ADD (_4, _6);
> + */
> +
> +static gimple *
> +vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +   tree *type_out)
> +{
> +  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
> +
> +  if (!is_gimple_assign (last_stmt))
> +return NULL;
> +
> +  tree res_ops[2];
> +  tree lhs = gimple_assign_lhs (last_stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
> +{
> +  tree itype = TREE_TYPE (res_ops[0]);
> +  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
> +
> +  if (vtype != NULL_TREE && direct_internal_fn_supported_p (
> +   IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))

Please break the line before the && instead, like

  if (vtype != NULL_TREE
  && direct_internal_fn_supported_p (...

Otherwise this is OK once 1/3 is approved.

Thanks,
Richard.

> +   {
> + *type_out = vtype;
> + gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, 
> res_ops[0],
> +   res_ops[1]);
> +
> + gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
> + gimple_call_set_nothrow (call, /* nothrow_p */ false);
> + gimple_set_location (call, gimple_location (last_stmt));
> +
> + vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
> + return call;
> +   }
> +}
> +
> +  return NULL;
> +}
> +
>  /* Detect a signed division by a constant that wouldn't be
> otherwise vectorized:
>
> @@ -6987,6 +7037,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>{ vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
>{ 

RE: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int

2024-05-13 Thread Tamar Christina
Hi Pan,

> -Original Message-
> From: pan2...@intel.com 
> Sent: Monday, May 6, 2024 3:49 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; Tamar Christina
> ; richard.guent...@gmail.com;
> hongtao@intel.com; Pan Li 
> Subject: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int
> 
> From: Pan Li 
> 
> This patch depends on below scalar enabling patch:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html
> 
> For vectorize, we leverage the existing vect pattern recog to find
> the pattern similar to scalar and let the vectorizer to perform
> the rest part for standard name usadd3 in vector mode.
> The riscv vector backend have insn "Vector Single-Width Saturating
> Add and Subtract" which can be leveraged when expand the usadd3
> in vector mode.  For example:
> 
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   unsigned i;
> 
>   for (i = 0; i < n; i++)
> out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
> }
> 
> Before this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
>   ivtmp_58 = _80 * 8;
>   vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
>   vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
>   vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
>   mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
>   vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615,
> ... }, vect__7.11_66);
>   .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
>   vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
>   vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
>   vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
>   ivtmp_79 = ivtmp_78 - _80;
>   ...
> }
> 
> After this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
>   ivtmp_46 = _62 * 8;
>   vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
>   vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
>   vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
>   .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
>   ...
> }
> 
> The below test suites are passed for this patch.
> * The riscv fully regression tests.
> * The aarch64 fully regression tests.
> * The x86 bootstrap tests.
> * The x86 fully regression tests.
> 
>   PR target/51492
>   PR target/112600
> 
> gcc/ChangeLog:
> 
>   * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func
>   decl generated by match.pd match.
>   (vect_recog_sat_add_pattern): New func impl to recog the pattern
>   for unsigned SAT_ADD.
> 
> Signed-off-by: Pan Li 

Patch looks good to me, but I cannot approve so I'll pass it on to Richi.

Cheers,
Tamar

> ---
>  gcc/tree-vect-patterns.cc | 51 +++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 87c2acff386..8ffcaf71d5c 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo,
>return pattern_stmt;
>  }
> 
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
> + *   _7 = _4 + _6;
> + *   _8 = _4 > _7;
> + *   _9 = (long unsigned int) _8;
> + *   _10 = -_9;
> + *   _12 = _7 | _10;
> + *
> + * And then simplied to
> + *   _12 = .SAT_ADD (_4, _6);
> + */
> +
> +static gimple *
> +vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> + tree *type_out)
> +{
> +  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
> +
> +  if (!is_gimple_assign (last_stmt))
> +return NULL;
> +
> +  tree res_ops[2];
> +  tree lhs = gimple_assign_lhs (last_stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
> +{
> +  tree itype = TREE_TYPE (res_ops[0]);
> +  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
> +
> +  if (vtype != NULL_TREE && direct_internal_fn_supported_p (
> + IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))
> + {
> +   *type_out = vtype;
> +   gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
> + res_ops[1]);
> +
> +   gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
> +   gimple_call_set_nothrow (call, /* nothrow_p */ false);
> +   gimple_set_location (call, gimple_location (last_stmt));
> +
> +   vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
> +   return call;
> + }
> +}
> +
> +  return NULL;
> +}
> +
>  /* Detect a signed division by a constant that wouldn't be
>